static void si_handle_env_var_force_family(struct si_screen *sscreen) { const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); unsigned i; if (!family) return; for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { if (!strcmp(family, r600_get_llvm_processor_name(i))) { /* Override family and chip_class. */ sscreen->b.family = sscreen->b.info.family = i; if (i >= CHIP_TONGA) sscreen->b.chip_class = sscreen->b.info.chip_class = VI; else if (i >= CHIP_BONAIRE) sscreen->b.chip_class = sscreen->b.info.chip_class = CIK; else sscreen->b.chip_class = sscreen->b.info.chip_class = SI; /* Don't submit any IBs. */ setenv("RADEON_NOOP", "1", 1); return; } } fprintf(stderr, "radeonsi: Unknown family: %s\n", family); exit(1); }
static LLVMTargetMachineRef si_create_llvm_target_machine(struct si_screen *sscreen) { const char *triple = "amdgcn--"; return LLVMCreateTargetMachine(radeon_llvm_get_r600_target(triple), triple, r600_get_llvm_processor_name(sscreen->b.family), #if HAVE_LLVM >= 0x0308 sscreen->b.debug_flags & DBG_SI_SCHED ? SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" : #endif SI_LLVM_DEFAULT_FEATURES, LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); }
unsigned r600_llvm_compile( LLVMModuleRef mod, enum radeon_family family, struct r600_bytecode *bc, boolean *use_kill, unsigned dump) { unsigned r; struct radeon_shader_binary binary; const char * gpu_family = r600_get_llvm_processor_name(family); memset(&binary, 0, sizeof(struct radeon_shader_binary)); r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL); r = r600_create_shader(bc, &binary, use_kill); FREE(binary.code); FREE(binary.config); FREE(binary.rodata); FREE(binary.global_symbol_offsets); return r; }
unsigned r600_llvm_compile( LLVMModuleRef mod, enum radeon_family family, struct r600_bytecode *bc, boolean *use_kill, unsigned dump, struct pipe_debug_callback *debug) { unsigned r; struct radeon_shader_binary binary; const char * gpu_family = r600_get_llvm_processor_name(family); radeon_shader_binary_init(&binary); if (dump) LLVMDumpModule(mod); r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug); r = r600_create_shader(bc, &binary, use_kill); radeon_shader_binary_clean(&binary); return r; }
static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_compute_cap param, void *ret) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; //TODO: select these params by asic switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: { const char *gpu; const char *triple; if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) { triple = "r600--"; } else { triple = "amdgcn--"; } switch(rscreen->family) { /* Clang < 3.6 is missing Hainan in its list of * GPUs, so we need to use the name of a similar GPU. */ #if HAVE_LLVM < 0x0306 case CHIP_HAINAN: gpu = "oland"; break; #endif default: gpu = r600_get_llvm_processor_name(rscreen->family); break; } if (ret) { sprintf(ret, "%s-%s", gpu, triple); } /* +2 for dash and terminating NIL byte */ return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); } case PIPE_COMPUTE_CAP_GRID_DIMENSION: if (ret) { uint64_t *grid_dimension = ret; grid_dimension[0] = 3; } return 1 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: if (ret) { uint64_t *grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; grid_size[2] = 1; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; block_size[0] = 256; block_size[1] = 256; block_size[2] = 256; } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; *max_threads_per_block = 256; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: if (ret) { uint64_t *max_global_size = ret; uint64_t max_mem_alloc_size; r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_mem_alloc_size); /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least * 1/4 of the MAX_GLOBAL_SIZE. Since the * MAX_MEM_ALLOC_SIZE is fixed for older kernels, * make sure we never report more than * 4 * MAX_MEM_ALLOC_SIZE. */ *max_global_size = MIN2(4 * max_mem_alloc_size, rscreen->info.gart_size + rscreen->info.vram_size); } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: if (ret) { uint64_t *max_local_size = ret; /* Value reported by the closed source driver. */ *max_local_size = 32768; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: if (ret) { uint64_t *max_input_size = ret; /* Value reported by the closed source driver. */ *max_input_size = 1024; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: if (ret) { uint64_t *max_mem_alloc_size = ret; /* XXX: The limit in older kernels is 256 MB. We * should add a query here for newer kernels. */ *max_mem_alloc_size = 256 * 1024 * 1024; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; *max_clock_frequency = rscreen->info.max_sclk; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: if (ret) { uint32_t *max_compute_units = ret; *max_compute_units = rscreen->info.max_compute_units; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: if (ret) { uint32_t *images_supported = ret; *images_supported = 0; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: break; /* unused */ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: if (ret) { uint32_t *subgroup_size = ret; *subgroup_size = r600_wavefront_size(rscreen->family); } return sizeof(uint32_t); } fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); return 0; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; const char *triple = "amdgcn--"; int shader, i; if (!sctx) return NULL; if (sscreen->b.debug_flags & DBG_CHECK_VM) flags |= PIPE_CONTEXT_DEBUG; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; } sctx->ce_suballocator = u_suballocator_create(&sctx->b.b, 1024 * 1024, 64, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, FALSE); if (!sctx->ce_suballocator) goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = (struct r600_resource*) pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->sample_mask.sample_mask = 0xffff; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, R600_COHERENCY_SHADER); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units; /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), #if HAVE_LLVM >= 0x0308 sscreen->b.debug_flags & DBG_SI_SCHED ? "+DumpCode,+vgpr-spilling,+si-scheduler" : #endif "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; }
static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *ret) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; //TODO: select these params by asic switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: { const char *gpu; const char *triple = "r600--"; gpu = r600_get_llvm_processor_name(rscreen->family); if (ret) { sprintf(ret, "%s-%s", gpu, triple); } /* +2 for dash and terminating NIL byte */ return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); } case PIPE_COMPUTE_CAP_GRID_DIMENSION: if (ret) { uint64_t *grid_dimension = ret; grid_dimension[0] = 3; } return 1 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: if (ret) { uint64_t *grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; grid_size[2] = 65535; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); block_size[0] = threads_per_block; block_size[1] = threads_per_block; block_size[2] = threads_per_block; } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_ADDRESS_BITS: if (ret) { uint32_t *address_bits = ret; address_bits[0] = 32; } return 1 * sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: if (ret) { uint64_t *max_global_size = ret; uint64_t max_mem_alloc_size; r600_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_mem_alloc_size); /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least * 1/4 of the MAX_GLOBAL_SIZE. Since the * MAX_MEM_ALLOC_SIZE is fixed for older kernels, * make sure we never report more than * 4 * MAX_MEM_ALLOC_SIZE. */ *max_global_size = MIN2(4 * max_mem_alloc_size, MAX2(rscreen->info.gart_size, rscreen->info.vram_size)); } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: if (ret) { uint64_t *max_local_size = ret; /* Value reported by the closed source driver. */ *max_local_size = 32768; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: if (ret) { uint64_t *max_input_size = ret; /* Value reported by the closed source driver. */ *max_input_size = 1024; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: if (ret) { uint64_t *max_mem_alloc_size = ret; *max_mem_alloc_size = rscreen->info.max_alloc_size; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: if (ret) { uint32_t *max_clock_frequency = ret; *max_clock_frequency = rscreen->info.max_shader_clock; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: if (ret) { uint32_t *max_compute_units = ret; *max_compute_units = rscreen->info.num_good_compute_units; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: if (ret) { uint32_t *images_supported = ret; *images_supported = 0; } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: break; /* unused */ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: if (ret) { uint32_t *subgroup_size = ret; *subgroup_size = r600_wavefront_size(rscreen->family); } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: if (ret) { uint64_t *max_variable_threads_per_block = ret; *max_variable_threads_per_block = 0; } return sizeof(uint64_t); } fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); return 0; }
static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_compute_cap param, void *ret) { struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; //TODO: select these params by asic switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: { const char *gpu = r600_get_llvm_processor_name(rscreen->family); if (ret) { sprintf(ret, "%s-r600--", gpu); } return (8 + strlen(gpu)) * sizeof(char); } case PIPE_COMPUTE_CAP_GRID_DIMENSION: if (ret) { uint64_t *grid_dimension = ret; grid_dimension[0] = 3; } return 1 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: if (ret) { uint64_t *grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; grid_size[2] = 1; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; block_size[0] = 256; block_size[1] = 256; block_size[2] = 256; } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; *max_threads_per_block = 256; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: if (ret) { uint64_t *max_global_size = ret; /* XXX: This is what the proprietary driver reports, we * may want to use a different value. */ /* XXX: Not sure what to put here for SI. */ if (rscreen->chip_class >= SI) *max_global_size = 2000000000; else *max_global_size = 201326592; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: if (ret) { uint64_t *max_local_size = ret; /* Value reported by the closed source driver. */ *max_local_size = 32768; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: if (ret) { uint64_t *max_input_size = ret; /* Value reported by the closed source driver. */ *max_input_size = 1024; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: if (ret) { uint64_t max_global_size; uint64_t *max_mem_alloc_size = ret; r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size); /* OpenCL requres this value be at least * max(MAX_GLOBAL_SIZE / 4, 128 * 1024 *1024) * I'm really not sure what value to report here, but * MAX_GLOBAL_SIZE / 4 seems resonable. */ *max_mem_alloc_size = max_global_size / 4; } return sizeof(uint64_t); default: fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); return 0; } }
static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_compute_cap param, void *ret) { struct r600_screen *rscreen = (struct r600_screen *)screen; //TODO: select these params by asic switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: { const char *gpu = r600_get_llvm_processor_name(rscreen->b.family); if (ret) { sprintf(ret, "%s-r600--", gpu); } return (8 + strlen(gpu)) * sizeof(char); } case PIPE_COMPUTE_CAP_GRID_DIMENSION: if (ret) { uint64_t * grid_dimension = ret; grid_dimension[0] = 3; } return 1 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: if (ret) { uint64_t * grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; grid_size[2] = 1; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t * block_size = ret; block_size[0] = 256; block_size[1] = 256; block_size[2] = 256; } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t * max_threads_per_block = ret; *max_threads_per_block = 256; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: if (ret) { uint64_t *max_global_size = ret; /* XXX: Not sure what to put here. */ *max_global_size = 2000000000; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: if (ret) { uint64_t *max_local_size = ret; /* Value reported by the closed source driver. */ *max_local_size = 32768; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: if (ret) { uint64_t *max_input_size = ret; /* Value reported by the closed source driver. */ *max_input_size = 1024; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: if (ret) { uint64_t max_global_size; uint64_t *max_mem_alloc_size = ret; r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size); *max_mem_alloc_size = max_global_size / 4; } return sizeof(uint64_t); default: fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); return 0; } }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; #if HAVE_LLVM >= 0x0306 const char *triple = "amdgcn--"; #endif int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx, sscreen->b.trace_bo ? sscreen->b.trace_bo->cs_buf : NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; sctx->msaa_sample_locs = si_atom_msaa_sample_locs; sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs; sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom; sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom; si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, false); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units; #if HAVE_LLVM >= 0x0306 /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); #endif return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }