JITFunctionInfo *JITImpl:: getJITFunctionOrStubImpl(JITCoreInfo &coreInfo, uint32_t pc) { JITFunctionInfo *&info = coreInfo.functionMap[pc]; if (info) return info; LLVMBasicBlockRef savedInsertPoint = LLVMGetInsertBlock(builder); LLVMValueRef f = LLVMAddFunction(module, "", jitFunctionType); LLVMSetFunctionCallConv(f, LLVMFastCallConv); LLVMBasicBlockRef entryBB = LLVMAppendBasicBlock(f, "entry"); LLVMPositionBuilderAtEnd(builder, entryBB); LLVMValueRef args[] = { LLVMGetParam(f, 0) }; LLVMValueRef call = LLVMBuildCall(builder, functions.jitStubImpl, args, 1, ""); LLVMBuildRet(builder, call); if (DEBUG_JIT) { LLVMDumpValue(f); LLVMVerifyFunction(f, LLVMAbortProcessAction); } JITInstructionFunction_t code = reinterpret_cast<JITInstructionFunction_t>( LLVMGetPointerToGlobal(executionEngine, f)); info = new JITFunctionInfo(pc, f, code, true); LLVMPositionBuilderAtEnd(builder, savedInsertPoint); return info; }
SWIGEXPORT jlong JNICALL Java_org_jllvm_bindings_ExecutionEngineJNI_LLVMGetPointerToGlobal(JNIEnv *jenv, jclass jcls, jlong jarg1, jlong jarg2) { jlong jresult = 0 ; LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; LLVMValueRef arg2 = (LLVMValueRef) 0 ; void *result = 0 ; (void)jenv; (void)jcls; arg1 = *(LLVMExecutionEngineRef *)&jarg1; arg2 = *(LLVMValueRef *)&jarg2; result = (void *)LLVMGetPointerToGlobal(arg1,arg2); *(void **)&jresult = result; return jresult; }
void *jit_fun_ptr(const char *name, bool required) { if (using_jit) { LLVMValueRef fn; if (LLVMFindFunction(exec_engine, name, &fn)) { if (required) fatal("cannot find function %s", name); else return NULL; } return LLVMGetPointerToGlobal(exec_engine, fn); } else return jit_var_ptr(name, required); }
JITInstructionFunction_t JITImpl::getFunctionThunk(JITFunctionInfo &info) { LLVMValueRef f = LLVMAddFunction(module, "", jitFunctionType); LLVMValueRef thread = LLVMGetParam(f, 0); LLVMBasicBlockRef entryBB = LLVMAppendBasicBlock(f, "entry"); LLVMPositionBuilderAtEnd(builder, entryBB); LLVMValueRef args[] = { thread }; LLVMValueRef call = LLVMBuildCall(builder, info.value, args, 1, ""); LLVMSetTailCall(call, true); LLVMSetInstructionCallConv(call, LLVMFastCallConv); LLVMBuildRet(builder, call); if (DEBUG_JIT) { LLVMDumpValue(f); LLVMVerifyFunction(f, LLVMAbortProcessAction); } return reinterpret_cast<JITInstructionFunction_t>( LLVMGetPointerToGlobal(executionEngine, f)); }
func_pointer gallivm_jit_function(struct gallivm_state *gallivm, LLVMValueRef func) { void *code; func_pointer jit_func; assert(gallivm->compiled); assert(gallivm->engine); code = LLVMGetPointerToGlobal(gallivm->engine, func); assert(code); jit_func = pointer_to_func(code); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } /* Free the function body to save memory */ lp_func_delete_body(func); return jit_func; }
void *jit_var_ptr(const char *name, bool required) { if (using_jit) { LLVMValueRef var = LLVMGetNamedGlobal(module, name); if (var == NULL) { if (required) fatal("cannot find global %s", name); else return NULL; } return LLVMGetPointerToGlobal(exec_engine, var); } else { dlerror(); // Clear any previous error char dlname[256]; jit_native_name(name, dlname, sizeof(dlname)); void *sym = dlsym(dl_handle, dlname); const char *error = dlerror(); if ((error != NULL) && required) fatal("%s", error); return sym; } }
ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; LLVMExecutionEngineRef engine = NULL; LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); module = LLVMModuleCreateWithName("test"); func = add_blend_test(module, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif if(verbose >= 2) LLVMDumpModule(module); blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); if(verbose >= 2) lp_disassemble(blend_test_ptr); success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; random_vec(type, src); random_vec(type, dst); random_vec(type, con); { double fsrc[LP_MAX_VECTOR_LENGTH]; double fdst[LP_MAX_VECTOR_LENGTH]; double fcon[LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH]; read_vec(type, src, fsrc); read_vec(type, dst, fdst); read_vec(type, con, fcon); for(j = 0; j < type.length; j += 4) compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); write_vec(type, ref, fref); } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; if(!compare_vec(type, res, ref)) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); fprintf(stderr, " Src: "); dump_vec(stderr, type, src); fprintf(stderr, "\n"); fprintf(stderr, " Dst: "); dump_vec(stderr, type, dst); fprintf(stderr, "\n"); fprintf(stderr, " Con: "); dump_vec(stderr, type, con); fprintf(stderr, "\n"); fprintf(stderr, " Res: "); dump_vec(stderr, type, res); fprintf(stderr, "\n"); fprintf(stderr, " Ref: "); dump_vec(stderr, type, ref); fprintf(stderr, "\n"); } } if(mode == SoA) { const unsigned stride = type.length*type.width/8; ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; for(j = 0; j < 4; ++j) { random_vec(type, src + j*stride); random_vec(type, dst + j*stride); random_vec(type, con + j*stride); } { double fsrc[4]; double fdst[4]; double fcon[4]; double fref[4]; unsigned k; for(k = 0; k < type.length; ++k) { for(j = 0; j < 4; ++j) { fsrc[j] = read_elem(type, src + j*stride, k); fdst[j] = read_elem(type, dst + j*stride, k); fcon[j] = read_elem(type, con + j*stride, k); } compute_blend_ref(blend, fsrc, fdst, fcon, fref); for(j = 0; j < 4; ++j) write_elem(type, ref + j*stride, k, fref[j]); } } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; mismatch = FALSE; for (j = 0; j < 4; ++j) if(!compare_vec(type, res + j*stride, ref + j*stride)) mismatch = TRUE; if (mismatch) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < 4; ++j) { char channel = "RGBA"[j]; fprintf(stderr, " Src%c: ", channel); dump_vec(stderr, type, src + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Dst%c: ", channel); dump_vec(stderr, type, dst + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Con%c: ", channel); dump_vec(stderr, type, con + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Res%c: ", channel); dump_vec(stderr, type, res + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); } } } } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, blend, mode, type, cycles_avg, success); if (!success) { if(verbose < 2) LLVMDumpModule(module); LLVMWriteBitcodeToFile(module, "blend.bc"); fprintf(stderr, "blend.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); abort(); } LLVMFreeMachineCodeForFunction(engine, func); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }
/** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned do_tri_test) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[14]; LLVMTypeRef func_type; LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type(); LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; LLVMValueRef function; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 elements per vector */ num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[8] = LLVMInt32Type(); /* c0 */ arg_types[9] = LLVMInt32Type(); /* c1 */ arg_types[10] = LLVMInt32Type(); /* c2 */ /* Note: the step arrays are built as int32[16] but we interpret * them here as int32_vec4[4]. */ arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[do_tri_test] = function; /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); a0_ptr = LLVMGetParam(function, 3); dadx_ptr = LLVMGetParam(function, 4); dady_ptr = LLVMGetParam(function, 5); color_ptr_ptr = LLVMGetParam(function, 6); depth_ptr = LLVMGetParam(function, 7); c0 = LLVMGetParam(function, 8); c1 = LLVMGetParam(function, 9); c2 = LLVMGetParam(function, 10); step0_ptr = LLVMGetParam(function, 11); step1_ptr = LLVMGetParam(function, 12); step2_ptr = LLVMGetParam(function, 13); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(c0, "c0"); lp_build_name(c1, "c1"); lp_build_name(c2, "c2"); lp_build_name(step0_ptr, "step0"); lp_build_name(step1_ptr, "step1"); lp_build_name(step2_ptr, "step2"); /* * Function body */ block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, key->flatshade, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; int cbuf; if(i != 0) lp_build_interp_soa_update(&interp, i); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], /* output */ out_color, depth_ptr_i, do_tri_test, c0, c1, c2, step0_ptr, step1_ptr, step2_ptr); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); /* Loop over color outputs / color buffers to do blending. */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[cbuf][chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); } LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) LLVMDumpValue(function); abort(); } #endif /* Apply optimizations to LLVM IR */ if (1) LLVMRunFunctionPassManager(screen->pass, function); if (LP_DEBUG & DEBUG_JIT) { /* Print the LLVM IR to stderr */ LLVMDumpValue(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function); if (LP_DEBUG & DEBUG_ASM) lp_disassemble(variant->jit_function[do_tri_test]); }
PIPE_ALIGN_STACK static boolean test_round(unsigned verbose, FILE *fp) { LLVMModuleRef module = NULL; LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; LLVMExecutionEngineRef engine = lp_build_engine; LLVMPassManagerRef pass = NULL; char *error = NULL; test_round_t round_func, trunc_func, floor_func, ceil_func; float unpacked[4]; unsigned packed; boolean success = TRUE; int i; module = LLVMModuleCreateWithName("test"); test_round = add_test(module, "round", lp_build_round); test_trunc = add_test(module, "trunc", lp_build_trunc); test_floor = add_test(module, "floor", lp_build_floor); test_ceil = add_test(module, "ceil", lp_build_ceil); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { printf("LLVMVerifyModule: %s\n", error); LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round)); trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc)); floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor)); ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil)); memset(unpacked, 0, sizeof unpacked); packed = 0; if (0) LLVMDumpModule(module); for (i = 0; i < 3; i++) { v4sf xvals[3] = { {-10.0, -1, 0, 12.0}, {-1.5, -0.25, 1.25, 2.5}, {-0.99, -0.01, 0.01, 0.99} }; v4sf x = xvals[i]; v4sf y, ref; float *xp = (float *) &x; float *refp = (float *) &ref; printf("\n"); printv("x ", x); refp[0] = round(xp[0]); refp[1] = round(xp[1]); refp[2] = round(xp[2]); refp[3] = round(xp[3]); y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); refp[2] = trunc(xp[2]); refp[3] = trunc(xp[3]); y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); refp[2] = floor(xp[2]); refp[3] = floor(xp[3]); y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); refp[2] = ceil(xp[2]); refp[3] = ceil(xp[3]); y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); LLVMFreeMachineCodeForFunction(engine, test_trunc); LLVMFreeMachineCodeForFunction(engine, test_floor); LLVMFreeMachineCodeForFunction(engine, test_ceil); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }
int main(int c, char **v) { LLVMContextRef *contexts; LLVMModuleRef *modules; char *error; const char *mode = "opt"; const char **filenames; unsigned numFiles; unsigned i; bool moreOptions; static int verboseFlag = 0; static int timingFlag = 0; static int disassembleFlag = 0; bool manyContexts = true; double beforeAll; if (c == 1) usage(); moreOptions = true; while (moreOptions) { static struct option longOptions[] = { {"verbose", no_argument, &verboseFlag, 1}, {"timing", no_argument, &timingFlag, 1}, {"disassemble", no_argument, &disassembleFlag, 1}, {"mode", required_argument, 0, 0}, {"contexts", required_argument, 0, 0}, {"help", no_argument, 0, 0} }; int optionIndex; int optionValue; optionValue = getopt_long(c, v, "", longOptions, &optionIndex); switch (optionValue) { case -1: moreOptions = false; break; case 0: { const char* thisOption = longOptions[optionIndex].name; if (!strcmp(thisOption, "help")) usage(); if (!strcmp(thisOption, "contexts")) { if (!strcasecmp(optarg, "one")) manyContexts = false; else if (!strcasecmp(optarg, "many")) manyContexts = true; else { fprintf(stderr, "Invalid argument for --contexts.\n"); exit(1); } break; } if (!strcmp(thisOption, "mode")) { mode = strdup(optarg); break; } break; } case '?': exit(0); break; default: printf("optionValue = %d\n", optionValue); abort(); break; } } LLVMLinkInMCJIT(); LLVMInitializeNativeTarget(); LLVMInitializeX86AsmPrinter(); LLVMInitializeX86Disassembler(); filenames = (const char **)(v + optind); numFiles = c - optind; contexts = malloc(sizeof(LLVMContextRef) * numFiles); modules = malloc(sizeof(LLVMModuleRef) * numFiles); if (manyContexts) { for (i = 0; i < numFiles; ++i) contexts[i] = LLVMContextCreate(); } else { LLVMContextRef context = LLVMContextCreate(); for (i = 0; i < numFiles; ++i) contexts[i] = context; } for (i = 0; i < numFiles; ++i) { LLVMMemoryBufferRef buffer; const char* filename = filenames[i]; if (LLVMCreateMemoryBufferWithContentsOfFile(filename, &buffer, &error)) { fprintf(stderr, "Error reading file %s: %s\n", filename, error); exit(1); } if (LLVMParseBitcodeInContext(contexts[i], buffer, modules + i, &error)) { fprintf(stderr, "Error parsing file %s: %s\n", filename, error); exit(1); } LLVMDisposeMemoryBuffer(buffer); if (verboseFlag) { printf("Module #%u (%s) after parsing:\n", i, filename); LLVMDumpModule(modules[i]); } } if (verboseFlag) printf("Generating code for modules...\n"); if (timingFlag) beforeAll = currentTime(); for (i = 0; i < numFiles; ++i) { LLVMModuleRef module; LLVMExecutionEngineRef engine; struct LLVMMCJITCompilerOptions options; LLVMValueRef value; LLVMPassManagerRef functionPasses = 0; LLVMPassManagerRef modulePasses = 0; double before; if (timingFlag) before = currentTime(); module = modules[i]; LLVMInitializeMCJITCompilerOptions(&options, sizeof(options)); options.OptLevel = 2; options.EnableFastISel = 0; options.MCJMM = LLVMCreateSimpleMCJITMemoryManager( 0, mmAllocateCodeSection, mmAllocateDataSection, mmApplyPermissions, mmDestroy); if (LLVMCreateMCJITCompilerForModule(&engine, module, &options, sizeof(options), &error)) { fprintf(stderr, "Error building MCJIT: %s\n", error); exit(1); } if (!strcasecmp(mode, "simple")) { modulePasses = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), modulePasses); LLVMAddConstantPropagationPass(modulePasses); LLVMAddInstructionCombiningPass(modulePasses); LLVMAddPromoteMemoryToRegisterPass(modulePasses); LLVMAddBasicAliasAnalysisPass(modulePasses); LLVMAddTypeBasedAliasAnalysisPass(modulePasses); LLVMAddGVNPass(modulePasses); LLVMAddCFGSimplificationPass(modulePasses); LLVMRunPassManager(modulePasses, module); } else if (!strcasecmp(mode, "opt")) { LLVMPassManagerBuilderRef passBuilder; passBuilder = LLVMPassManagerBuilderCreate(); LLVMPassManagerBuilderSetOptLevel(passBuilder, 2); LLVMPassManagerBuilderSetSizeLevel(passBuilder, 0); functionPasses = LLVMCreateFunctionPassManagerForModule(module); modulePasses = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), modulePasses); LLVMPassManagerBuilderPopulateFunctionPassManager(passBuilder, functionPasses); LLVMPassManagerBuilderPopulateModulePassManager(passBuilder, modulePasses); LLVMPassManagerBuilderDispose(passBuilder); LLVMInitializeFunctionPassManager(functionPasses); for (value = LLVMGetFirstFunction(module); value; value = LLVMGetNextFunction(value)) LLVMRunFunctionPassManager(functionPasses, value); LLVMFinalizeFunctionPassManager(functionPasses); LLVMRunPassManager(modulePasses, module); } else { fprintf(stderr, "Bad optimization mode: %s.\n", mode); fprintf(stderr, "Valid modes are: \"simple\" or \"opt\".\n"); exit(1); } if (verboseFlag) { printf("Module #%d (%s) after optimization:\n", i, filenames[i]); LLVMDumpModule(module); } for (value = LLVMGetFirstFunction(module); value; value = LLVMGetNextFunction(value)) { if (LLVMIsDeclaration(value)) continue; LLVMGetPointerToGlobal(engine, value); } if (functionPasses) LLVMDisposePassManager(functionPasses); if (modulePasses) LLVMDisposePassManager(modulePasses); LLVMDisposeExecutionEngine(engine); if (timingFlag) { double after = currentTime(); printf("Module #%d (%s) took %lf ms.\n", i, filenames[i], (after - before) * 1000); } } if (timingFlag) { double after = currentTime(); printf("Compilation took a total of %lf ms.\n", (after - beforeAll) * 1000); } if (disassembleFlag) { LLVMDisasmContextRef disassembler; struct MemorySection *section; disassembler = LLVMCreateDisasm("x86_64-apple-darwin", 0, 0, 0, symbolLookupCallback); if (!disassembler) { fprintf(stderr, "Error building disassembler.\n"); exit(1); } for (section = sectionHead; section; section = section->next) { printf("Disassembly for section %p:\n", section); char pcString[20]; char instructionString[1000]; uint8_t *pc; uint8_t *end; pc = section->start; end = pc + section->size; while (pc < end) { snprintf( pcString, sizeof(pcString), "0x%lx", (unsigned long)(uintptr_t)pc); size_t instructionSize = LLVMDisasmInstruction( disassembler, pc, end - pc, (uintptr_t)pc, instructionString, sizeof(instructionString)); if (!instructionSize) snprintf(instructionString, sizeof(instructionString), ".byte 0x%02x", *pc++); else pc += instructionSize; printf(" %16s: %s\n", pcString, instructionString); } } } return 0; }
/** * Generate the runtime callable function for the whole fragment pipeline. */ static struct lp_fragment_shader_variant * generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader_variant *variant; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef blend_int_vec_type; LLVMTypeRef arg_types[9]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef mask_ptr; LLVMValueRef color_ptr; LLVMValueRef depth_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef x0; LLVMValueRef y0; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned num_fs; unsigned i; unsigned chan; #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value); } if(key->blend.logicop_enable) { debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func); } else if(key->blend.blend_enable) { debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE)); debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE)); debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE)); debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE)); debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE)); debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { if(key->sampler[i].format) { debug_printf("sampler[%u] = \n", i); debug_printf(" .format = %s\n", pf_name(key->sampler[i].format)); debug_printf(" .target = %s\n", debug_dump_tex_target(key->sampler[i].target, TRUE)); debug_printf(" .pot = %u %u %u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth); debug_printf(" .wrap = %s %s %s\n", debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); debug_printf(" .min_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); debug_printf(" .min_mip_filter = %s\n", debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); if(key->sampler[i].compare_mode) debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } } #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); if(!variant) return NULL; variant->shader = shader; memcpy(&variant->key, key, sizeof *key); /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 element per vector */ num_fs = 4; memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); blend_int_vec_type = lp_build_int_vec_type(blend_type); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */ arg_types[7] = LLVMPointerType(blend_vec_type, 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(screen->module, "shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function, 0); x = LLVMGetParam(variant->function, 1); y = LLVMGetParam(variant->function, 2); a0_ptr = LLVMGetParam(variant->function, 3); dadx_ptr = LLVMGetParam(variant->function, 4); dady_ptr = LLVMGetParam(variant->function, 5); mask_ptr = LLVMGetParam(variant->function, 6); color_ptr = LLVMGetParam(variant->function, 7); depth_ptr = LLVMGetParam(variant->function, 8); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(mask_ptr, "mask"); lp_build_name(color_ptr, "color"); lp_build_name(depth_ptr, "depth"); /* * Function body */ block = LLVMAppendBasicBlock(variant->function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); generate_pos0(builder, x, y, &x0, &y0); lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); #if 0 /* C texture sampling */ sampler = lp_c_sampler_soa_create(context_ptr); #else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); #endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) lp_build_interp_soa_update(&interp); fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), ""); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], out_color, depth_ptr_i); for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[chan][i] = out_color[chan]; } sampler->destroy(sampler); /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]); } lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); /* * Blending. */ generate_blend(&key->blend, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* * Translate the LLVM IR into machine code. */ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { LLVMDumpValue(variant->function); abort(); } LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG LLVMDumpValue(variant->function); debug_printf("\n"); #endif variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG lp_disassemble(variant->jit_function); #endif variant->next = shader->variants; shader->variants = variant; return variant; }
/** * Generate the runtime callable function for the whole fragment pipeline. * Note that the function which we generate operates on a block of 16 * pixels at at time. The block contains 2x2 quads. Each quad contains * 2x2 pixels. */ static void generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef arg_types[11]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMValueRef x; LLVMValueRef y; LLVMValueRef a0_ptr; LLVMValueRef dadx_ptr; LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef depth_ptr; LLVMValueRef mask_input; LLVMValueRef counter = NULL; LLVMBasicBlockRef block; LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; unsigned num_fs; unsigned i; unsigned chan; unsigned cbuf; /* TODO: actually pick these based on the fs and color buffer * characteristics. */ memset(&fs_type, 0, sizeof fs_type); fs_type.floating = TRUE; /* floating point values */ fs_type.sign = TRUE; /* values are signed */ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ fs_type.width = 32; /* 32-bit float */ fs_type.length = 4; /* 4 elements per vector */ num_fs = 4; /* number of quads per block */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ blend_type.sign = FALSE; /* values are unsigned */ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ blend_type.width = 8; /* 8-bit ubyte values */ blend_type.length = 16; /* 16 elements per vector */ /* * Generate the function prototype. Any change here must be reflected in * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. */ fs_elem_type = lp_build_elem_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", shader->no, variant->no, partial_mask ? "partial" : "whole"); arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ arg_types[3] = LLVMFloatType(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); function = LLVMAddFunction(screen->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); variant->function[partial_mask] = function; /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(function, 0); x = LLVMGetParam(function, 1); y = LLVMGetParam(function, 2); facing = LLVMGetParam(function, 3); a0_ptr = LLVMGetParam(function, 4); dadx_ptr = LLVMGetParam(function, 5); dady_ptr = LLVMGetParam(function, 6); color_ptr_ptr = LLVMGetParam(function, 7); depth_ptr = LLVMGetParam(function, 8); mask_input = LLVMGetParam(function, 9); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); lp_build_name(y, "y"); lp_build_name(a0_ptr, "a0"); lp_build_name(dadx_ptr, "dadx"); lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); lp_build_name(mask_input, "mask_input"); if (key->occlusion_count) { counter = LLVMGetParam(function, 10); lp_build_name(counter, "counter"); } /* * Function body */ block = LLVMAppendBasicBlock(function, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); /* * The shader input interpolation info is not explicitely baked in the * shader key, but everything it derives from (TGSI, and flatshade) is * already included in the shader key. */ lp_build_interp_soa_init(&interp, lp->num_inputs, lp->inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; if(i != 0) lp_build_interp_soa_update(&interp, i); depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, builder, fs_type, context_ptr, i, &interp, sampler, &fs_mask[i], /* output */ out_color, depth_ptr_i, facing, partial_mask, mask_input, counter); for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) for(chan = 0; chan < NUM_CHANNELS; ++chan) fs_out_color[cbuf][chan][i] = out_color[cbuf][chan]; } sampler->destroy(sampler); /* Loop over color outputs / color buffers to do blending. */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { LLVMValueRef color_ptr; LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0); LLVMValueRef blend_in_color[NUM_CHANNELS]; unsigned rt; /* * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { lp_build_conv(builder, fs_type, blend_type, fs_out_color[cbuf][chan], num_fs, &blend_in_color[chan], 1); lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } if (partial_mask || !variant->opaque) { lp_build_conv_mask(builder, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); } else { blend_mask = lp_build_const_int_vec(blend_type, ~0); } color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); /* which blend/colormask state to use */ rt = key->blend.independent_blend_enable ? cbuf : 0; /* * Blending. */ generate_blend(&key->blend, rt, builder, blend_type, context_ptr, blend_mask, blend_in_color, color_ptr); } #ifdef PIPE_ARCH_X86 /* Avoid corrupting the FPU stack on 32bit OSes. */ lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); #endif LLVMBuildRetVoid(builder); LLVMDisposeBuilder(builder); /* Verify the LLVM IR. If invalid, dump and abort */ #ifdef DEBUG if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { if (1) lp_debug_dump_value(function); abort(); } #endif /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); if (gallivm_debug & GALLIVM_DEBUG_IR) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); } /* * Translate the LLVM IR into machine code. */ { void *f = LLVMGetPointerToGlobal(screen->engine, function); variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(f); } lp_func_delete_body(function); } }
PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, struct lp_type src_type, struct lp_type dst_type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; LLVMExecutionEngineRef engine = NULL; LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned num_srcs; unsigned num_dsts; double eps; unsigned i, j; if(verbose >= 1) dump_conv_types(stdout, src_type, dst_type); if(src_type.length > dst_type.length) { num_srcs = 1; num_dsts = src_type.length/dst_type.length; } else { num_dsts = 1; num_srcs = dst_type.length/src_type.length; } assert(src_type.width * src_type.length == dst_type.width * dst_type.length); /* We must not loose or gain channels. Only precision */ assert(src_type.length * num_srcs == dst_type.length * num_dsts); eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); module = LLVMModuleCreateWithName("test"); func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) dump_conv_types(stderr, src_type, dst_type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif if(verbose >= 2) LLVMDumpModule(module); conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func); if(verbose >= 2) lp_disassemble(conv_test_ptr); success = TRUE; for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; int64_t end_counter = 0; for(j = 0; j < num_srcs; ++j) { random_vec(src_type, src + j*src_stride); read_vec(src_type, src + j*src_stride, fref + j*src_type.length); } for(j = 0; j < num_dsts; ++j) { write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length); } start_counter = rdtsc(); conv_test_ptr(src, dst); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; for(j = 0; j < num_dsts; ++j) { if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps)) success = FALSE; } if (!success) { if(verbose < 1) dump_conv_types(stderr, src_type, dst_type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < num_srcs; ++j) { fprintf(stderr, " Src%u: ", j); dump_vec(stderr, src_type, src + j*src_stride); fprintf(stderr, "\n"); } #if 1 fprintf(stderr, " Ref: "); for(j = 0; j < src_type.length*num_srcs; ++j) fprintf(stderr, " %f", fref[j]); fprintf(stderr, "\n"); #endif for(j = 0; j < num_dsts; ++j) { fprintf(stderr, " Dst%u: ", j); dump_vec(stderr, dst_type, dst + j*dst_stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%u: ", j); dump_vec(stderr, dst_type, ref + j*dst_stride); fprintf(stderr, "\n"); } } } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, src_type, dst_type, cycles_avg, success); if (!success) { static boolean firsttime = TRUE; if(firsttime) { if(verbose < 2) LLVMDumpModule(module); LLVMWriteBitcodeToFile(module, "conv.bc"); fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; /* abort(); */ } } LLVMFreeMachineCodeForFunction(engine, func); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }