static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; struct radeon_winsys *ws = rscreen->b.ws; if (rctx == NULL) return NULL; rctx->b.b.screen = screen; rctx->b.b.priv = priv; rctx->b.b.destroy = r600_destroy_context; if (!r600_common_context_init(&rctx->b, &rscreen->b)) goto fail; rctx->screen = rscreen; rctx->keep_tiling_flags = rscreen->b.info.drm_minor >= 12; r600_init_blit_functions(rctx); if (rscreen->b.info.has_uvd) { rctx->b.b.create_video_codec = r600_uvd_create_decoder; rctx->b.b.create_video_buffer = r600_video_buffer_create; } else { rctx->b.b.create_video_codec = vl_create_decoder; rctx->b.b.create_video_buffer = vl_video_buffer_create; } r600_init_common_state_functions(rctx); switch (rctx->b.chip_class) { case R600: case R700: r600_init_state_functions(rctx); r600_init_atom_start_cs(rctx); rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = rctx->b.chip_class == R700 ? r700_create_resolve_blend(rctx) : r600_create_resolve_blend(rctx); rctx->custom_blend_decompress = r600_create_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RS780 || rctx->b.family == CHIP_RS880 || rctx->b.family == CHIP_RV710); break; case EVERGREEN: case CAYMAN: evergreen_init_state_functions(rctx); evergreen_init_atom_start_cs(rctx); evergreen_init_atom_start_compute_cs(rctx); rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); rctx->custom_blend_fastclear = evergreen_create_fastclear_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_CEDAR || rctx->b.family == CHIP_PALM || rctx->b.family == CHIP_SUMO || rctx->b.family == CHIP_SUMO2 || rctx->b.family == CHIP_CAICOS || rctx->b.family == CHIP_CAYMAN || rctx->b.family == CHIP_ARUBA); break; default: R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); goto fail; } rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, r600_context_gfx_flush, rctx, rscreen->b.trace_bo ? rscreen->b.trace_bo->cs_buf : NULL); rctx->b.rings.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256, 0, PIPE_USAGE_DEFAULT, FALSE); if (!rctx->allocator_fetch_shader) goto fail; rctx->isa = calloc(1, sizeof(struct r600_isa)); if (!rctx->isa || r600_isa_init(rctx, rctx->isa)) goto fail; rctx->blitter = util_blitter_create(&rctx->b.b); if (rctx->blitter == NULL) goto fail; util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); r600_query_init_backend_mask(&rctx->b); /* this emits commands and must be last */ rctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&rctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); return &rctx->b.b; fail: r600_destroy_context(&rctx->b.b); return NULL; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; si_init_blit_functions(sctx); si_init_compute_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush, sctx, NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom; sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom; switch (sctx->b.chip_class) { case SI: case CIK: si_init_state_functions(sctx); si_init_config(sctx); break; default: R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class); goto fail; } sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&sctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); sctx->b.b.bind_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0); } return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; int shader, i; if (!sctx) return NULL; if (sscreen->b.debug_flags & DBG_CHECK_VM) flags |= PIPE_CONTEXT_DEBUG; if (flags & PIPE_CONTEXT_DEBUG) sscreen->record_llvm_ir = true; /* racy but not critical */ sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.b.emit_string_marker = si_emit_string_marker; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; } sctx->ce_suballocator = u_suballocator_create(&sctx->b.b, 1024 * 1024, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, false); if (!sctx->ce_suballocator) goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = (struct r600_resource*) pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->sample_mask.sample_mask = 0xffff; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, R600_COHERENCY_SHADER); } uint64_t max_threads_per_block; screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &max_threads_per_block); /* The maximum number of scratch waves. Scratch space isn't divided * evenly between CUs. The number is only a function of the number of CUs. * We can decrease the constant to decrease the scratch buffer size. * * sctx->scratch_waves must be >= the maximum posible size of * 1 threadgroup, so that the hw doesn't hang from being unable * to start any. * * The recommended value is 4 per CU at most. Higher numbers don't * bring much benefit, but they still occupy chip resources (think * async compute). I've seen ~2% performance difference between 4 and 32. */ sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units, max_threads_per_block / 64); sctx->tm = si_create_llvm_target_machine(sscreen); return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; const char *triple = "amdgcn--"; int shader, i; if (!sctx) return NULL; if (sscreen->b.debug_flags & DBG_CHECK_VM) flags |= PIPE_CONTEXT_DEBUG; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; } sctx->ce_suballocator = u_suballocator_create(&sctx->b.b, 1024 * 1024, 64, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, FALSE); if (!sctx->ce_suballocator) goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = (struct r600_resource*) pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->sample_mask.sample_mask = 0xffff; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, R600_COHERENCY_SHADER); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.num_good_compute_units; /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), #if HAVE_LLVM >= 0x0308 sscreen->b.debug_flags & DBG_SI_SCHED ? "+DumpCode,+vgpr-spilling,+si-scheduler" : #endif "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; LLVMTargetRef r600_target; #if HAVE_LLVM >= 0x0306 const char *triple = "amdgcn--"; #endif int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx, sscreen->b.trace_bo ? sscreen->b.trace_bo->cs_buf : NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; sctx->msaa_sample_locs = si_atom_msaa_sample_locs; sctx->atoms.s.msaa_sample_locs = &sctx->msaa_sample_locs; sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom; sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom; si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, false); } /* XXX: This is the maximum value allowed. I'm not sure how to compute * this for non-cs shaders. Using the wrong value here can result in * GPU lockups, but the maximum value seems to always work. */ sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units; #if HAVE_LLVM >= 0x0306 /* Initialize LLVM TargetMachine */ r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); #endif return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }