JITFunctionInfo *JITImpl:: getJITFunctionOrStubImpl(JITCoreInfo &coreInfo, uint32_t pc) { JITFunctionInfo *&info = coreInfo.functionMap[pc]; if (info) return info; LLVMBasicBlockRef savedInsertPoint = LLVMGetInsertBlock(builder); LLVMValueRef f = LLVMAddFunction(module, "", jitFunctionType); LLVMSetFunctionCallConv(f, LLVMFastCallConv); LLVMBasicBlockRef entryBB = LLVMAppendBasicBlock(f, "entry"); LLVMPositionBuilderAtEnd(builder, entryBB); LLVMValueRef args[] = { LLVMGetParam(f, 0) }; LLVMValueRef call = LLVMBuildCall(builder, functions.jitStubImpl, args, 1, ""); LLVMBuildRet(builder, call); if (DEBUG_JIT) { LLVMDumpValue(f); LLVMVerifyFunction(f, LLVMAbortProcessAction); } JITInstructionFunction_t code = reinterpret_cast<JITInstructionFunction_t>( LLVMGetPointerToGlobal(executionEngine, f)); info = new JITFunctionInfo(pc, f, code, true); LLVMPositionBuilderAtEnd(builder, savedInsertPoint); return info; }
/** * Same as LLVMDumpValue, but through our debugging channels. */ extern "C" void lp_debug_dump_value(LLVMValueRef value) { #if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED) raw_debug_ostream os; llvm::unwrap(value)->print(os); os.flush(); #else LLVMDumpValue(value); #endif }
JITInstructionFunction_t JITImpl::getFunctionThunk(JITFunctionInfo &info) { LLVMValueRef f = LLVMAddFunction(module, "", jitFunctionType); LLVMValueRef thread = LLVMGetParam(f, 0); LLVMBasicBlockRef entryBB = LLVMAppendBasicBlock(f, "entry"); LLVMPositionBuilderAtEnd(builder, entryBB); LLVMValueRef args[] = { thread }; LLVMValueRef call = LLVMBuildCall(builder, info.value, args, 1, ""); LLVMSetTailCall(call, true); LLVMSetInstructionCallConv(call, LLVMFastCallConv); LLVMBuildRet(builder, call); if (DEBUG_JIT) { LLVMDumpValue(f); LLVMVerifyFunction(f, LLVMAbortProcessAction); } return reinterpret_cast<JITInstructionFunction_t>( LLVMGetPointerToGlobal(executionEngine, f)); }
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }
/** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 elements per vector */ num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ arg_types[9] = LLVMInt32Type(); /* c1 */ arg_types[10] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. */ arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[do_tri_test] = function; /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); a0_ptr = LLVMGetParam(function, 3); dadx_ptr = LLVMGetParam(function, 4); dady_ptr = LLVMGetParam(function, 5); color_ptr_ptr = LLVMGetParam(function, 6); depth_ptr = LLVMGetParam(function, 7); c0 = LLVMGetParam(function, 8); c1 = LLVMGetParam(function, 9); c2 = LLVMGetParam(function, 10); step0_ptr = LLVMGetParam(function, 11); step1_ptr = LLVMGetParam(function, 12); step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); lp_build_name(c2, "c2"); lp_build_name(step0_ptr, "step0"); lp_build_name(step1_ptr, "step1"); lp_build_name(step2_ptr, "step2"); /* * Function body */ block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, key->flatshade, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], /* output */ out_color, depth_ptr_i, do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); /* Loop over color outputs / color buffers to do blending. */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[cbuf][chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); } LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) LLVMDumpValue(function); abort(); } #endif /* Apply optimizations to LLVM IR */ if (1) LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { /* Print the LLVM IR to stderr */ LLVMDumpValue(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) lp_disassemble(variant->jit_function[do_tri_test]); }
/// Try and compile a fragment starting at the specified address. Returns /// true if successful setting \a nextAddress to the first instruction after /// the fragment. If unsuccessful returns false and sets \a nextAddress to the /// address after the current function. \a endOfBlock is set to true if the /// next address is in a new basic block. bool JITImpl:: compileOneFragment(Core &core, JITCoreInfo &coreInfo, uint32_t startPc, bool &endOfBlock, uint32_t &pcAfterFragment) { assert(initialized); resetPerFunctionState(); std::map<uint32_t,JITFunctionInfo*>::iterator infoIt = coreInfo.functionMap.find(startPc); JITFunctionInfo *info = (infoIt == coreInfo.functionMap.end()) ? 0 : infoIt->second; if (info && !info->isStub) { endOfBlock = true; return false; } std::vector<InstructionOpcode> opcode; std::vector<Operands> operands; if (!getFragmentToCompile(core, startPc, opcode, operands, endOfBlock, pcAfterFragment)) { return false; } std::queue<std::pair<uint32_t,MemoryCheck*> > checks; placeMemoryChecks(opcode, operands, checks); LLVMValueRef f; if (info) { f = info->value; info->func = 0; info->isStub = false; deleteFunctionBody(f); } else { info = new JITFunctionInfo(startPc); coreInfo.functionMap.insert(std::make_pair(startPc, info)); // Create function to contain the code we are about to add. info->value = f = LLVMAddFunction(module, "", jitFunctionType); LLVMSetFunctionCallConv(f, LLVMFastCallConv); } threadParam = LLVMGetParam(f, 0); LLVMValueRef ramBase = LLVMConstInt(LLVMInt32Type(), core.ram_base, false); ramSizeLog2Param = LLVMConstInt(LLVMInt32Type(), core.ramSizeLog2, false); LLVMBasicBlockRef entryBB = LLVMAppendBasicBlock(f, "entry"); LLVMPositionBuilderAtEnd(builder, entryBB); uint32_t pc = startPc; bool needsReturn = true; for (unsigned i = 0, e = opcode.size(); i != e; ++i) { InstructionOpcode opc = opcode[i]; const Operands &ops = operands[i]; InstructionProperties *properties = &instructionProperties[opc]; uint32_t nextPc = pc + properties->size / 2; emitMemoryChecks(i, checks); // Lookup function to call. LLVMValueRef callee = LLVMGetNamedFunction(module, properties->function); assert(callee && "Function for instruction not found in module"); LLVMTypeRef calleeType = LLVMGetElementType(LLVMTypeOf(callee)); const unsigned fixedArgs = 4; const unsigned maxOperands = 6; unsigned numArgs = properties->getNumExplicitOperands() + fixedArgs; assert(LLVMCountParamTypes(calleeType) == numArgs); LLVMTypeRef paramTypes[fixedArgs + maxOperands]; assert(numArgs <= (fixedArgs + maxOperands)); LLVMGetParamTypes(calleeType, paramTypes); // Build call. LLVMValueRef args[fixedArgs + maxOperands]; args[0] = threadParam; args[1] = LLVMConstInt(paramTypes[1], nextPc, false); args[2] = ramBase; args[3] = ramSizeLog2Param; for (unsigned i = fixedArgs; i < numArgs; i++) { uint32_t value = properties->getNumExplicitOperands() <= 3 ? ops.ops[i - fixedArgs] : ops.lops[i - fixedArgs]; args[i] = LLVMConstInt(paramTypes[i], value, false); } LLVMValueRef call = emitCallToBeInlined(callee, args, numArgs); checkReturnValue(call, *properties); if (properties->mayBranch() && properties->function && emitJumpToNextFragment(opc, ops, coreInfo, nextPc, info)) { needsReturn = false; } pc = nextPc; } assert(checks.empty() && "Not all checks emitted"); if (needsReturn) { LLVMValueRef args[] = { threadParam }; emitCallToBeInlined(functions.jitUpdateExecutionFrequency, args, 1); // Build return. LLVMBuildRet(builder, LLVMConstInt(LLVMGetReturnType(jitFunctionType), JIT_RETURN_CONTINUE, 0)); } // Add incoming phi values. if (earlyReturnBB) { LLVMAddIncoming(earlyReturnPhi, &earlyReturnIncomingValues[0], &earlyReturnIncomingBlocks[0], earlyReturnIncomingValues.size()); } if (DEBUG_JIT) { LLVMDumpValue(f); LLVMVerifyFunction(f, LLVMAbortProcessAction); } // Optimize. for (std::vector<LLVMValueRef>::iterator it = calls.begin(), e = calls.end(); it != e; ++it) { LLVMExtraInlineFunction(*it); } LLVMRunFunctionPassManager(FPM, f); if (DEBUG_JIT) { LLVMDumpValue(f); } // Compile. JITInstructionFunction_t compiledFunction = reinterpret_cast<JITInstructionFunction_t>( LLVMRecompileAndRelinkFunction(executionEngine, f)); info->isStub = false; info->func = compiledFunction; core.setOpcode(startPc, getFunctionThunk(*info), (pc - startPc) * 2); return true; }
/** * Generate the runtime callable function for the whole fragment pipeline. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[9]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef mask_ptr; LLVMValueRef color_ptr; LLVMValueRef depth_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned num_fs; unsigned i; unsigned chan; #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); } if(key->blend.logicop_enable) { debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { if(key->sampler[i].format) { debug_printf("sampler[%u] = \n", i); debug_printf(" .format = %s\n", pf_name(key->sampler[i].format)); debug_printf(" .target = %s\n", debug_dump_tex_target(key->sampler[i].target, TRUE)); debug_printf(" .pot = %u %u %u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth); debug_printf(" .wrap = %s %s %s\n", debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); debug_printf(" .min_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); debug_printf(" .min_mip_filter = %s\n", debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); if(key->sampler[i].compare_mode) debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } } #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) return NULL; variant->shader = shader; memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 element per vector */ num_fs = 4; memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function, 0); x = LLVMGetParam(variant->function, 1); y = LLVMGetParam(variant->function, 2); a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); mask_ptr = LLVMGetParam(variant->function, 6); color_ptr = LLVMGetParam(variant->function, 7); depth_ptr = LLVMGetParam(variant->function, 8); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(mask_ptr, "mask"); lp_build_name(color_ptr, "color"); lp_build_name(depth_ptr, "depth"); /* * Function body */ block = LLVMAppendBasicBlock(variant->function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); #if 0 /* C texture sampling */ sampler = lp_c_sampler_soa_create(context_ptr); #else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); #endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) lp_build_interp_soa_update(&interp); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], out_color, depth_ptr_i); for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[chan][i] = out_color[chan]; } sampler->destroy(sampler); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* * Translate the LLVM IR into machine code. */ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); abort(); } LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG LLVMDumpValue(variant->function); debug_printf("\n"); #endif variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG lp_disassemble(variant->jit_function); #endif variant->next = shader->variants; shader->variants = variant; return variant; }