static LLVMTargetMachineRef make_machine(pass_opt_t* opt) { LLVMTargetRef target; char* err; if(LLVMGetTargetFromTriple(opt->triple, &target, &err) != 0) { errorf(opt->check.errors, NULL, "couldn't create target: %s", err); LLVMDisposeMessage(err); return NULL; } LLVMCodeGenOptLevel opt_level = opt->release ? LLVMCodeGenLevelAggressive : LLVMCodeGenLevelNone; LLVMRelocMode reloc = (opt->pic || opt->library) ? LLVMRelocPIC : LLVMRelocDefault; LLVMTargetMachineRef machine = LLVMCreateTargetMachine(target, opt->triple, opt->cpu, opt->features, opt_level, reloc, LLVMCodeModelDefault); if(machine == NULL) { errorf(opt->check.errors, NULL, "couldn't create target machine"); return NULL; } return machine; }
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options) { assert(family >= CHIP_TAHITI); char features[256]; const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); snprintf(features, sizeof(features), "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s", tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, triple, ac_get_llvm_processor_name(family), features, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); return tm; }
/* triple:string -> ?cpu:string -> ?features:string ?level:CodeGenOptLevel.t -> ?reloc_mode:RelocMode.t ?code_model:CodeModel.t -> Target.t -> TargetMachine.t */ CAMLprim value llvm_create_targetmachine_native(value Triple, value CPU, value Features, value OptLevel, value RelocMode, value CodeModel, LLVMTargetRef Target) { LLVMTargetMachineRef Machine; const char *CPUStr = "", *FeaturesStr = ""; LLVMCodeGenOptLevel OptLevelEnum = LLVMCodeGenLevelDefault; LLVMRelocMode RelocModeEnum = LLVMRelocDefault; LLVMCodeModel CodeModelEnum = LLVMCodeModelDefault; if(CPU != Val_int(0)) CPUStr = String_val(Field(CPU, 0)); if(Features != Val_int(0)) FeaturesStr = String_val(Field(Features, 0)); if(OptLevel != Val_int(0)) OptLevelEnum = Int_val(Field(OptLevel, 0)); if(RelocMode != Val_int(0)) RelocModeEnum = Int_val(Field(RelocMode, 0)); if(CodeModel != Val_int(0)) CodeModelEnum = Int_val(Field(CodeModel, 0)); Machine = LLVMCreateTargetMachine(Target, String_val(Triple), CPUStr, FeaturesStr, OptLevelEnum, RelocModeEnum, CodeModelEnum); return llvm_alloc_targetmachine(Machine); }
static LLVMTargetMachineRef si_create_llvm_target_machine(struct si_screen *sscreen) { const char *triple = "amdgcn--"; return LLVMCreateTargetMachine(radeon_llvm_get_r600_target(triple), triple, r600_get_llvm_processor_name(sscreen->b.family), #if HAVE_LLVM >= 0x0308 sscreen->b.debug_flags & DBG_SI_SCHED ? SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" : #endif SI_LLVM_DEFAULT_FEATURES, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); }
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) { assert(family >= CHIP_TAHITI); const char *triple = "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, triple, ac_get_llvm_processor_name(family), "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); return tm; }
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm) { char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; bool dispose_tm = false; LLVMContextRef llvm_ctx; unsigned rval = 0; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; if (!tm) { strncpy(triple, "r600--", TRIPLE_STRING_LEN); LLVMTargetRef target = radeon_llvm_get_r600_target(triple); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); if (dump) { strncpy(fs, "+DumpCode", FS_STRING_LEN); } tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); dispose_tm = true; } if (dump) { LLVMDumpModule(M); } /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); #if HAVE_LLVM >= 0x0305 LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval); #endif rval = 0; /* Compile IR*/ mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); /* Process Errors/Warnings */ if (mem_err) { fprintf(stderr, "%s: %s", __FUNCTION__, err); FREE(err); LLVMDisposeTargetMachine(tm); return 1; } if (0 != rval) { fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__); } /* Extract Shader Code*/ buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); radeon_elf_read(buffer_data, buffer_size, binary, dump); /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); if (dispose_tm) { LLVMDisposeTargetMachine(tm); } return rval; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; const char *triple = "amdgcn--"; int shader, i; if (!sctx) return NULL; if (sscreen->b.debug_flags & DBG_CHECK_VM) flags |= PIPE_CONTEXT_DEBUG; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; } sctx->ce_suballocator = u_suballocator_create(&sctx->b.b, 1024 * 1024, 64, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, FALSE); if (!sctx->ce_suballocator) goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = (struct r600_resource*) pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->sample_mask.sample_mask = 0xffff; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, R600_COHERENCY_SHADER); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units; /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), #if HAVE_LLVM >= 0x0308 sscreen->b.debug_flags & DBG_SI_SCHED ? "+DumpCode,+vgpr-spilling,+si-scheduler" : #endif "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; }
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_llvm_binary *binary, const char * gpu_family, unsigned dump) { LLVMTargetRef target; LLVMTargetMachineRef tm; char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; char *elf_buffer; Elf *elf; Elf_Scn *section = NULL; size_t section_str_index; LLVMBool r; init_r600_target(); target = get_r600_target(); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); if (dump) { LLVMDumpModule(M); strncpy(fs, "+DumpCode", FS_STRING_LEN); } strncpy(triple, "r600--", TRIPLE_STRING_LEN); tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); r = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); if (r) { fprintf(stderr, "%s", err); FREE(err); return 1; } buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); /* One of the libelf implementations * (http://www.mr511.de/software/english.htm) requires calling * elf_version() before elf_memory(). */ elf_version(EV_CURRENT); elf_buffer = MALLOC(buffer_size); memcpy(elf_buffer, buffer_data, buffer_size); elf = elf_memory(elf_buffer, buffer_size); elf_getshdrstrndx(elf, §ion_str_index); binary->disassembled = 0; while ((section = elf_nextscn(elf, section))) { const char *name; Elf_Data *section_data = NULL; GElf_Shdr section_header; if (gelf_getshdr(section, §ion_header) != §ion_header) { fprintf(stderr, "Failed to read ELF section header\n"); return 1; } name = elf_strptr(elf, section_str_index, section_header.sh_name); if (!strcmp(name, ".text")) { section_data = elf_getdata(section, section_data); binary->code_size = section_data->d_size; binary->code = MALLOC(binary->code_size * sizeof(unsigned char)); memcpy(binary->code, section_data->d_buf, binary->code_size); } else if (!strcmp(name, ".AMDGPU.config")) { section_data = elf_getdata(section, section_data); binary->config_size = section_data->d_size; binary->config = MALLOC(binary->config_size * sizeof(unsigned char)); memcpy(binary->config, section_data->d_buf, binary->config_size); } else if (dump && !strcmp(name, ".AMDGPU.disasm")) { binary->disassembled = 1; section_data = elf_getdata(section, section_data); fprintf(stderr, "\nShader Disassembly:\n\n"); fprintf(stderr, "%.*s\n", (int)section_data->d_size, (char *)section_data->d_buf); } } LLVMDisposeMemoryBuffer(out_buffer); LLVMDisposeTargetMachine(tm); return 0; }
/* * machine_init * * Initializes the machine context. */ machinedef_t * machine_init (const char *mspec) { machine_ctx_t m; machinedef_t *mach; char *err; LLVMTargetRef target; char *machspec = (char *) mspec; unsigned long allosize; if (machspec == 0) { machspec = LLVM_DEFAULT_TARGET_TRIPLE; } allosize = (sizeof(struct machine_ctx_s) + sizeof(struct machinedef_s) + strlen(machspec) + 1); m = malloc(allosize); if (m == 0) return 0; memset(m, 0, allosize); m->triple = ((char *) m) + (sizeof(struct machine_ctx_s) + sizeof(struct machinedef_s)); memcpy(m->triple, machspec, strlen(machspec)); LLVM_NATIVE_TARGETINFO(); LLVM_NATIVE_TARGET(); LLVM_NATIVE_TARGETMC(); LLVM_NATIVE_ASMPRINTER(); LLVM_NATIVE_ASMPARSER(); err = 0; target = HelperLookupTarget(machspec, &err); if (target == 0) { if (err != 0) free(err); free(m); return 0; } m->target_machine = LLVMCreateTargetMachine(target, (char *)machspec, "", "", LLVMCodeGenLevelDefault, LLVMRelocPIC, LLVMCodeModelDefault); if (m->target_machine == 0) { free(m); return 0; } HelperSetAsmVerbosity(m->target_machine, 1); m->llvmctx = LLVMContextCreate(); if (m->llvmctx == 0) { LLVMDisposeTargetMachine(m->target_machine); free(m); return 0; } m->is_macho = (strstr(machspec, "darwin") != 0); // XXX mach = (machinedef_t *)(m + 1); mach->machctx = m; mach->bpaddr = sizeof(int *) * 8; mach->bpval = sizeof(long) * 8; mach->bpunit = 8; mach->charsize_count = 1; mach->charsizes[0] = 8; mach->flags = MACH_M_SIGNEXT | MACH_M_LTC_INIT; mach->max_align = 4; mach->reg_count = 16; return mach; } /* machine_init */
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, const char *gpu_family, LLVMTargetMachineRef tm, struct pipe_debug_callback *debug) { struct radeon_llvm_diagnostics diag; char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; bool dispose_tm = false; LLVMContextRef llvm_ctx; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; diag.debug = debug; diag.retval = 0; if (!tm) { strncpy(triple, "r600--", TRIPLE_STRING_LEN); LLVMTargetRef target = radeon_llvm_get_r600_target(triple); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); strncpy(fs, "+DumpCode", FS_STRING_LEN); tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); dispose_tm = true; } /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag); /* Compile IR*/ mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); /* Process Errors/Warnings */ if (mem_err) { fprintf(stderr, "%s: %s", __FUNCTION__, err); pipe_debug_message(debug, SHADER_INFO, "LLVM emit error: %s", err); FREE(err); diag.retval = 1; goto out; } /* Extract Shader Code*/ buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); radeon_elf_read(buffer_data, buffer_size, binary); /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); out: if (dispose_tm) { LLVMDisposeTargetMachine(tm); } if (diag.retval != 0) pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); return diag.retval; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; #if HAVE_LLVM >= 0x0306 const char *triple = "amdgcn--"; #endif int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx, sscreen->b.trace_bo ? sscreen->b.trace_bo->cs_buf : NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; sctx->msaa_sample_locs = si_atom_msaa_sample_locs; sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs; sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom; sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom; si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, false); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units; #if HAVE_LLVM >= 0x0306 /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); #endif return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }