static void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); #ifdef HAVE_OPENCL const struct pipe_llvm_program_header *header; const char *code; void *p; boolean use_kill; COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n"); header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); radeon_shader_binary_init(&shader->binary); radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); /* Upload code + ROdata */ shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen, shader->bc.ndw * 4); p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); //TODO: use util_memcpy_cpu_to_le32 ? memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); rctx->b.ws->buffer_unmap(shader->code_bo->buf); #endif shader->ctx = rctx; shader->local_size = cso->req_local_mem; shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; return shader; }
void *evergreen_create_compute_state( struct pipe_context *ctx_, const const struct pipe_compute_state *cso) { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); #ifdef HAVE_OPENCL const struct pipe_llvm_program_header * header; const char *code; void *p; boolean use_kill; COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); #if HAVE_LLVM < 0x0306 (void)use_kill; (void)p; shader->llvm_ctx = LLVMContextCreate(); shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code, header->num_bytes); shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); { unsigned i; for (i = 0; i < shader->num_kernels; i++) { struct r600_kernel *kernel = &shader->kernels[i]; kernel->llvm_module = radeon_llvm_get_kernel_module( shader->llvm_ctx, i, code, header->num_bytes); } } #else radeon_shader_binary_init(&shader->binary); radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, shader->bc.ndw * 4); p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); ctx->b.ws->buffer_unmap(shader->code_bo->buf); #endif #endif shader->ctx = ctx; shader->local_size = cso->req_local_mem; shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; return shader; }
static void *si_create_compute_state( struct pipe_context *ctx, const struct pipe_compute_state *cso) { struct si_context *sctx = (struct si_context *)ctx; struct si_screen *sscreen = (struct si_screen *)ctx->screen; struct si_compute *program = CALLOC_STRUCT(si_compute); struct si_shader *shader = &program->shader; program->ir_type = cso->ir_type; program->local_size = cso->req_local_mem; program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; program->use_code_object_v2 = HAVE_LLVM >= 0x0400 && cso->ir_type == PIPE_SHADER_IR_NATIVE; if (cso->ir_type == PIPE_SHADER_IR_TGSI) { struct si_shader_selector sel; bool scratch_enabled; memset(&sel, 0, sizeof(sel)); sel.tokens = tgsi_dup_tokens(cso->prog); if (!sel.tokens) { FREE(program); return NULL; } tgsi_scan_shader(cso->prog, &sel.info); sel.type = PIPE_SHADER_COMPUTE; sel.local_size = cso->req_local_mem; p_atomic_inc(&sscreen->b.num_shaders_created); program->shader.selector = &sel; if (si_shader_create(sscreen, sctx->tm, &program->shader, &sctx->b.debug)) { FREE(sel.tokens); FREE(program); return NULL; } scratch_enabled = shader->config.scratch_bytes_per_wave > 0; shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B84C_USER_SGPR(SI_CS_NUM_USER_SGPR) | S_00B84C_SCRATCH_EN(scratch_enabled) | S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) | S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_LDS_SIZE(shader->config.lds_size); FREE(sel.tokens); } else { const struct pipe_llvm_program_header *header; const char *code; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); radeon_elf_read(code, header->num_bytes, &program->shader.binary); if (program->use_code_object_v2) { const amd_kernel_code_t *code_object = si_compute_get_code_object(program, 0); code_object_to_config(code_object, &program->shader.config); } else { si_shader_binary_read_config(&program->shader.binary, &program->shader.config, 0); } si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug, PIPE_SHADER_COMPUTE, stderr); si_shader_binary_upload(sctx->screen, &program->shader); } return program; }
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm) { char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; bool dispose_tm = false; LLVMContextRef llvm_ctx; unsigned rval = 0; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; if (!tm) { strncpy(triple, "r600--", TRIPLE_STRING_LEN); LLVMTargetRef target = radeon_llvm_get_r600_target(triple); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); if (dump) { strncpy(fs, "+DumpCode", FS_STRING_LEN); } tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); dispose_tm = true; } if (dump) { LLVMDumpModule(M); } /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); #if HAVE_LLVM >= 0x0305 LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval); #endif rval = 0; /* Compile IR*/ mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); /* Process Errors/Warnings */ if (mem_err) { fprintf(stderr, "%s: %s", __FUNCTION__, err); FREE(err); LLVMDisposeTargetMachine(tm); return 1; } if (0 != rval) { fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__); } /* Extract Shader Code*/ buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); radeon_elf_read(buffer_data, buffer_size, binary, dump); /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); if (dispose_tm) { LLVMDisposeTargetMachine(tm); } return rval; }
/** * Shader compiles can be overridden with arbitrary ELF objects by setting * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] */ bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary) { const char *p = debug_get_option_replace_shaders(); const char *semicolon; char *copy = NULL; FILE *f; long filesize, nread; char *buf = NULL; bool replaced = false; if (!p) return false; while (*p) { unsigned long i; char *endp; i = strtoul(p, &endp, 0); p = endp; if (*p != ':') { fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n"); exit(1); } ++p; if (i == num) break; p = strchr(p, ';'); if (!p) return false; ++p; } if (!*p) return false; semicolon = strchr(p, ';'); if (semicolon) { p = copy = strndup(p, semicolon - p); if (!copy) { fprintf(stderr, "out of memory\n"); return false; } } fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p); f = fopen(p, "r"); if (!f) { perror("radeonsi: failed to open file"); goto out_free; } if (fseek(f, 0, SEEK_END) != 0) goto file_error; filesize = ftell(f); if (filesize < 0) goto file_error; if (fseek(f, 0, SEEK_SET) != 0) goto file_error; buf = MALLOC(filesize); if (!buf) { fprintf(stderr, "out of memory\n"); goto out_close; } nread = fread(buf, 1, filesize, f); if (nread != filesize) goto file_error; radeon_elf_read(buf, filesize, binary); replaced = true; out_close: fclose(f); out_free: FREE(buf); free(copy); return replaced; file_error: perror("radeonsi: reading shader"); goto out_close; }
/** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, const char *gpu_family, LLVMTargetMachineRef tm, struct pipe_debug_callback *debug) { struct radeon_llvm_diagnostics diag; char cpu[CPU_STRING_LEN]; char fs[FS_STRING_LEN]; char *err; bool dispose_tm = false; LLVMContextRef llvm_ctx; LLVMMemoryBufferRef out_buffer; unsigned buffer_size; const char *buffer_data; char triple[TRIPLE_STRING_LEN]; LLVMBool mem_err; diag.debug = debug; diag.retval = 0; if (!tm) { strncpy(triple, "r600--", TRIPLE_STRING_LEN); LLVMTargetRef target = radeon_llvm_get_r600_target(triple); if (!target) { return 1; } strncpy(cpu, gpu_family, CPU_STRING_LEN); memset(fs, 0, sizeof(fs)); strncpy(fs, "+DumpCode", FS_STRING_LEN); tm = LLVMCreateTargetMachine(target, triple, cpu, fs, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); dispose_tm = true; } /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &diag); /* Compile IR*/ mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err, &out_buffer); /* Process Errors/Warnings */ if (mem_err) { fprintf(stderr, "%s: %s", __FUNCTION__, err); pipe_debug_message(debug, SHADER_INFO, "LLVM emit error: %s", err); FREE(err); diag.retval = 1; goto out; } /* Extract Shader Code*/ buffer_size = LLVMGetBufferSize(out_buffer); buffer_data = LLVMGetBufferStart(out_buffer); radeon_elf_read(buffer_data, buffer_size, binary); /* Clean up */ LLVMDisposeMemoryBuffer(out_buffer); out: if (dispose_tm) { LLVMDisposeTargetMachine(tm); } if (diag.retval != 0) pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); return diag.retval; }