void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset) { *rbuffer = NULL; if (R600_BIG_ENDIAN) { uint32_t *tmpPtr; unsigned i; if (!(tmpPtr = malloc(size))) { R600_ERR("Failed to allocate BE swap buffer.\n"); return; } for (i = 0; i < size / 4; ++i) { tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); } u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset, (struct pipe_resource**)rbuffer); free(tmpPtr); } else { u_upload_data(rctx->uploader, 0, size, ptr, const_offset, (struct pipe_resource**)rbuffer); } }
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, struct pipe_subresource sr, unsigned usage, const struct pipe_box *box) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; struct r600_transfer *trans; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; pipe_resource_reference(&trans->transfer.resource, texture); trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; trans->transfer.stride = rtex->pitch[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); if (rtex->tilled && !rtex->depth) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; resource.depth0 = 0; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; resource.flags = 0; /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ if (usage & PIPE_TRANSFER_READ) { resource.bind |= PIPE_BIND_RENDER_TARGET; } /* For texture writing, the temporary texture is used as a sampler * when blitting into a tiled texture. */ if (usage & PIPE_TRANSFER_WRITE) { resource.bind |= PIPE_BIND_SAMPLER_VIEW; } /* Create the temporary texture. */ trans->linear_texture = ctx->screen->resource_create(ctx->screen, &resource); if (trans->linear_texture == NULL) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ r600_copy_from_tiled_texture(ctx, trans); /* Always referenced in the blit. */ ctx->flush(ctx, 0, NULL); } } return &trans->transfer; }
static void evergreen_launch_grid( struct pipe_context *ctx_, const uint *block_layout, const uint *grid_layout, uint32_t pc, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; #ifdef HAVE_OPENCL struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; boolean use_kill; #if HAVE_LLVM < 0x0306 struct r600_kernel *kernel = &shader->kernels[pc]; (void)use_kill; if (!kernel->code_bo) { void *p; struct r600_bytecode *bc = &kernel->bc; LLVMModuleRef mod = kernel->llvm_module; boolean use_kill = false; bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; unsigned sb_disasm = use_sb || (ctx->screen->b.debug_flags & DBG_SB_DISASM); r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, ctx->screen->has_compressed_msaa_texturing); bc->type = TGSI_PROCESSOR_COMPUTE; bc->isa = ctx->isa; r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump, &ctx->b.debug); if (dump && !sb_disasm) { r600_bytecode_disasm(bc); } else if ((dump && sb_disasm) || use_sb) { if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) R600_ERR("r600_sb_bytecode_process failed!\n"); } kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, kernel->bc.ndw * 4); p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); ctx->b.ws->buffer_unmap(kernel->code_bo->buf); } shader->active_kernel = kernel; ctx->cs_shader_state.kernel_index = pc; #else ctx->cs_shader_state.pc = pc; /* Get the config information for this kernel. */ r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); #endif #endif COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); }
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); if (!htile_size) return; rtex->htile_buffer = (struct r600_resource*) pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, htile_size); if (rtex->htile_buffer == NULL) { /* this is not a fatal error as we can still keep rendering * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0); } }
bool r600_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture, struct r600_resource_texture **staging) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; struct r600_resource_texture **flushed_depth_texture = staging ? staging : &rtex->flushed_depth_texture; if (!staging && rtex->flushed_depth_texture) return true; /* it's ready */ resource.target = texture->target; resource.format = texture->format; resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = texture->depth0; resource.array_size = texture->array_size; resource.last_level = texture->last_level; resource.nr_samples = texture->nr_samples; resource.usage = staging ? PIPE_USAGE_DYNAMIC : PIPE_USAGE_DEFAULT; resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL; resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH; if (staging) resource.flags |= R600_RESOURCE_FLAG_TRANSFER; else rtex->dirty_db_mask = (1 << (resource.last_level+1)) - 1; *flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); if (*flushed_depth_texture == NULL) { R600_ERR("failed to create temporary texture to hold flushed depth\n"); return false; } (*flushed_depth_texture)->is_flushing_texture = TRUE; return true; }
int r600_texture_depth_flush(struct pipe_context *ctx, struct pipe_resource *texture, boolean just_create) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; if (rtex->flushed_depth_texture) goto out; resource.target = texture->target; resource.format = texture->format; resource.width0 = texture->width0; resource.height0 = texture->height0; resource.depth0 = texture->depth0; resource.array_size = texture->array_size; resource.last_level = texture->last_level; resource.nr_samples = texture->nr_samples; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = texture->bind | PIPE_BIND_DEPTH_STENCIL; resource.flags = R600_RESOURCE_FLAG_TRANSFER | texture->flags; rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); if (rtex->flushed_depth_texture == NULL) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); return -ENOMEM; } ((struct r600_resource_texture *)rtex->flushed_depth_texture)->is_flushing_texture = TRUE; out: if (just_create) return 0; /* XXX: only do this if the depth texture has actually changed: */ si_blit_uncompress_depth(ctx, rtex); return 0; }
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) { unsigned opcode; if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { opcode = PKT3_SET_CONFIG_REG; reg -= SI_CONFIG_REG_OFFSET; } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { opcode = PKT3_SET_SH_REG; reg -= SI_SH_REG_OFFSET; } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { opcode = PKT3_SET_CONTEXT_REG; reg -= SI_CONTEXT_REG_OFFSET; } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { opcode = PKT3_SET_UCONFIG_REG; reg -= CIK_UCONFIG_REG_OFFSET; } else { R600_ERR("Invalid register offset %08x!\n", reg); return; } reg >>= 2; if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { si_pm4_cmd_begin(state, opcode); si_pm4_cmd_add(state, reg); } state->last_reg = reg; si_pm4_cmd_add(state, val); si_pm4_cmd_end(state, false); }
static unsigned si_conv_pipe_prim(unsigned pprim) { static const unsigned prim_conv[] = { [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST, [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST, [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP, [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP, [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST, [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP, [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN, [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST, [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP, [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON, [PIPE_PRIM_LINES_ADJACENCY] = ~0, [PIPE_PRIM_LINE_STRIP_ADJACENCY] = ~0, [PIPE_PRIM_TRIANGLES_ADJACENCY] = ~0, [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = ~0 }; unsigned result = prim_conv[pprim]; if (result == ~0) { R600_ERR("unsupported primitive type %d\n", pprim); } return result; }
void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = r600_resource(buffer); struct r600_pipe_resource_state *rstate; uint32_t offset; /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ if (buffer == NULL) { return; } r600_upload_const_buffer(rctx, &rbuffer, &offset); switch (shader) { case PIPE_SHADER_VERTEX: rctx->vs_const_buffer.nregs = 0; r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028180_ALU_CONST_BUFFER_SIZE_VS_0 + index * 4, ALIGN_DIVUP(buffer->width0 >> 4, 16), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0 + index * 4, offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } break; case PIPE_SHADER_FRAGMENT: rctx->ps_const_buffer.nregs = 0; r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, ALIGN_DIVUP(buffer->width0 >> 4, 16), 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_init_buffer_resource(rctx, rstate); } else { r600_pipe_init_buffer_resource(rctx, rstate); } } if (rctx->chip_class >= EVERGREEN) { evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } break; default: R600_ERR("unsupported %d\n", shader); return; } if (buffer != &rbuffer->b.b.b) pipe_resource_reference((struct pipe_resource**)&rbuffer, NULL); }
static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; struct radeon_winsys *ws = rscreen->b.ws; if (!rctx) return NULL; rctx->b.b.screen = screen; assert(!priv); rctx->b.b.priv = NULL; /* for threaded_context_unwrap_sync */ rctx->b.b.destroy = r600_destroy_context; rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty; if (!r600_common_context_init(&rctx->b, &rscreen->b, flags)) goto fail; rctx->screen = rscreen; LIST_INITHEAD(&rctx->texture_buffers); r600_init_blit_functions(rctx); if (rscreen->b.info.has_hw_decode) { rctx->b.b.create_video_codec = r600_uvd_create_decoder; rctx->b.b.create_video_buffer = r600_video_buffer_create; } else { rctx->b.b.create_video_codec = vl_create_decoder; rctx->b.b.create_video_buffer = vl_video_buffer_create; } if (getenv("R600_TRACE")) rctx->is_debug = true; r600_init_common_state_functions(rctx); switch (rctx->b.chip_class) { case R600: case R700: r600_init_state_functions(rctx); r600_init_atom_start_cs(rctx); rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = rctx->b.chip_class == R700 ? r700_create_resolve_blend(rctx) : r600_create_resolve_blend(rctx); rctx->custom_blend_decompress = r600_create_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RS780 || rctx->b.family == CHIP_RS880 || rctx->b.family == CHIP_RV710); break; case EVERGREEN: case CAYMAN: evergreen_init_state_functions(rctx); evergreen_init_atom_start_cs(rctx); evergreen_init_atom_start_compute_cs(rctx); rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); rctx->custom_blend_fastclear = evergreen_create_fastclear_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_CEDAR || rctx->b.family == CHIP_PALM || rctx->b.family == CHIP_SUMO || rctx->b.family == CHIP_SUMO2 || rctx->b.family == CHIP_CAICOS || rctx->b.family == CHIP_CAYMAN || rctx->b.family == CHIP_ARUBA); break; default: R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); goto fail; } rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, r600_context_gfx_flush, rctx); rctx->b.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 0, PIPE_USAGE_DEFAULT, 0, FALSE); if (!rctx->allocator_fetch_shader) goto fail; rctx->isa = calloc(1, sizeof(struct r600_isa)); if (!rctx->isa || r600_isa_init(rctx, rctx->isa)) goto fail; if (rscreen->b.debug_flags & DBG_FORCE_DMA) rctx->b.b.resource_copy_region = rctx->b.dma_copy; rctx->blitter = util_blitter_create(&rctx->b.b); if (rctx->blitter == NULL) goto fail; util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); rctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&rctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); return &rctx->b.b; fail: r600_destroy_context(&rctx->b.b); return NULL; }
static void *r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_texture *rtex = (struct r600_texture*)texture; struct r600_transfer *trans; boolean use_staging_texture = FALSE; struct r600_resource *buf; unsigned offset = 0; char *map; /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. * * Also, use a temporary in GTT memory for read transfers, as * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { use_staging_texture = TRUE; } else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) && (rtex->resource.domains == RADEON_DOMAIN_VRAM)) { /* Untiled buffers in VRAM, which is slow for CPU reads */ use_staging_texture = TRUE; } else if (!(usage & PIPE_TRANSFER_READ) && (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) { /* Use a staging texture for uploads if the underlying BO is busy. */ use_staging_texture = TRUE; } if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) { use_staging_texture = FALSE; } if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) { return NULL; } trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; trans->transfer.resource = texture; trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; if (rtex->is_depth) { struct r600_texture *staging_depth; if (rtex->resource.b.b.nr_samples > 1) { /* MSAA depth buffers need to be converted to single sample buffers. * * Mapping MSAA depth buffers can occur if ReadPixels is called * with a multisample GLX visual. * * First downsample the depth buffer to a temporary texture, * then decompress the temporary one to staging. * * Only the region being mapped is transfered. */ struct pipe_resource resource; r600_init_temp_resource_from_box(&resource, texture, box, level, 0); if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); FREE(trans); return NULL; } if (usage & PIPE_TRANSFER_READ) { struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource); r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box); rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth, 0, 0, 0, box->depth, 0, 0); pipe_resource_reference((struct pipe_resource**)&temp, NULL); } } else { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture */ if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); FREE(trans); return NULL; } rctx->blit_decompress_depth(ctx, rtex, staging_depth, level, level, box->z, box->z + box->depth - 1, 0, 0); offset = r600_texture_get_offset(staging_depth, level, box); } trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes; trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size; trans->staging = (struct r600_resource*)staging_depth; } else if (use_staging_texture) { struct pipe_resource resource; struct r600_texture *staging; r600_init_temp_resource_from_box(&resource, texture, box, level, R600_RESOURCE_FLAG_TRANSFER); resource.usage = (usage & PIPE_TRANSFER_READ) ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; /* Create the temporary texture. */ staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); if (staging == NULL) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); FREE(trans); return NULL; } trans->staging = &staging->resource; trans->transfer.stride = staging->surface.level[0].pitch_bytes; trans->transfer.layer_stride = staging->surface.level[0].slice_size; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); } } else { /* the resource is mapped directly */ trans->transfer.stride = rtex->surface.level[level].pitch_bytes; trans->transfer.layer_stride = rtex->surface.level[level].slice_size; offset = r600_texture_get_offset(rtex, level, box); } if (trans->staging) { buf = trans->staging; if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ)) usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else { buf = &rtex->resource; } if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) { pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL); FREE(trans); return NULL; } *ptransfer = &trans->transfer; return map + offset; }
/* The number of samples can be specified independently of the texture. */ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, struct r600_texture *rtex, unsigned nr_samples, struct r600_fmask_info *out) { /* FMASK is allocated like an ordinary texture. */ struct radeon_surf fmask = rtex->surface; memset(out, 0, sizeof(*out)); fmask.bo_alignment = 0; fmask.bo_size = 0; fmask.nsamples = 1; fmask.flags |= RADEON_SURF_FMASK; /* Force 2D tiling if it wasn't set. This may occur when creating * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample * destination buffer must have an FMASK too. */ fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE); fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); if (rscreen->chip_class >= SI) { fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; } switch (nr_samples) { case 2: case 4: fmask.bpe = 1; if (rscreen->chip_class <= CAYMAN) { fmask.bankh = 4; } break; case 8: fmask.bpe = 4; break; default: R600_ERR("Invalid sample count for FMASK allocation.\n"); return; } /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption. * This can be fixed by writing a separate FMASK allocator specifically * for R600-R700 asics. */ if (rscreen->chip_class <= R700) { fmask.bpe *= 2; } if (rscreen->ws->surface_init(rscreen->ws, &fmask)) { R600_ERR("Got error in surface_init while allocating FMASK.\n"); return; } assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; if (out->slice_tile_max) out->slice_tile_max -= 1; out->tile_mode_index = fmask.tiling_index[0]; out->pitch = fmask.level[0].nblk_x; out->bank_height = fmask.bankh; out->alignment = MAX2(256, fmask.bo_alignment); out->size = fmask.bo_size; }
int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) { unsigned id = cf->id; switch (cf->inst) { case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: /* prepend ALU_EXTENDED if we need more than 2 kcache sets */ if (cf->eg_alu_extended) { bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE0(V_SQ_CF_INDEX_NONE) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE1(V_SQ_CF_INDEX_NONE) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE2(V_SQ_CF_INDEX_NONE) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK_INDEX_MODE3(V_SQ_CF_INDEX_NONE) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK2(cf->kcache[2].bank) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_BANK3(cf->kcache[3].bank) | S_SQ_CF_ALU_WORD0_EXT_KCACHE_MODE2(cf->kcache[2].mode); bc->bytecode[id++] = EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED | S_SQ_CF_ALU_WORD1_EXT_KCACHE_MODE3(cf->kcache[3].mode) | S_SQ_CF_ALU_WORD1_EXT_KCACHE_ADDR2(cf->kcache[2].addr) | S_SQ_CF_ALU_WORD1_EXT_KCACHE_ADDR3(cf->kcache[3].addr) | S_SQ_CF_ALU_WORD1_EXT_BARRIER(1); } bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = cf->inst | S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); bc->bytecode[id++] = cf->inst | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); break; case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | cf->output.inst; if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; break; case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | cf->output.inst | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size); if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = cf->inst | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; case CF_NATIVE: bc->bytecode[id++] = cf->isa[0]; bc->bytecode[id++] = cf->isa[1]; break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; } return 0; }
static struct pipe_transfer* si_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_resource resource; struct r600_transfer *trans; int r; boolean use_staging_texture = FALSE; /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. * * Also, use a temporary in GTT memory for read transfers, as * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ if (rtex->surface.level[level].mode != RADEON_SURF_MODE_LINEAR_ALIGNED && rtex->surface.level[level].mode != RADEON_SURF_MODE_LINEAR) use_staging_texture = TRUE; if ((usage & PIPE_TRANSFER_READ) && u_box_volume(box) > 1024) use_staging_texture = TRUE; /* XXX: Use a staging texture for uploads if the underlying BO * is busy. No interface for checking that currently? so do * it eagerly whenever the transfer doesn't require a readback * and might block. */ if ((usage & PIPE_TRANSFER_WRITE) && !(usage & (PIPE_TRANSFER_READ | PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_UNSYNCHRONIZED))) use_staging_texture = TRUE; if (!permit_hardware_blit(ctx->screen, texture) || (texture->flags & R600_RESOURCE_FLAG_TRANSFER)) use_staging_texture = FALSE; if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) return NULL; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; pipe_resource_reference(&trans->transfer.resource, texture); trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; if (rtex->depth) { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture */ r = r600_texture_depth_flush(ctx, texture, FALSE); if (r < 0) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } trans->transfer.stride = rtex->flushed_depth_texture->surface.level[level].pitch_bytes; trans->offset = r600_texture_get_offset(rtex->flushed_depth_texture, level, box->z); return &trans->transfer; } else if (use_staging_texture) { resource.target = PIPE_TEXTURE_2D; resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; resource.depth0 = 1; resource.array_size = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_STAGING; resource.bind = 0; resource.flags = R600_RESOURCE_FLAG_TRANSFER; /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ if (usage & PIPE_TRANSFER_READ) { resource.bind |= PIPE_BIND_RENDER_TARGET; } /* For texture writing, the temporary texture is used as a sampler * when blitting into a tiled texture. */ if (usage & PIPE_TRANSFER_WRITE) { resource.bind |= PIPE_BIND_SAMPLER_VIEW; } /* Create the temporary texture. */ trans->staging_texture = ctx->screen->resource_create(ctx->screen, &resource); if (trans->staging_texture == NULL) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } trans->transfer.stride = ((struct r600_resource_texture *)trans->staging_texture) ->surface.level[0].pitch_bytes; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ radeonsi_flush(ctx, NULL, 0); } return &trans->transfer; } trans->transfer.stride = rtex->surface.level[level].pitch_bytes; trans->transfer.layer_stride = rtex->surface.level[level].slice_size; trans->offset = r600_texture_get_offset(rtex, level, box->z); return &trans->transfer; }
static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct r600_transfer *trans; boolean use_staging_texture = FALSE; struct radeon_winsys_cs_handle *buf; enum pipe_format format = texture->format; unsigned offset = 0; char *map; /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. * * Also, use a temporary in GTT memory for read transfers, as * the CPU is much happier reading out of cached system memory * than uncached VRAM. */ if (rtex->surface.level[level].mode != RADEON_SURF_MODE_LINEAR_ALIGNED && rtex->surface.level[level].mode != RADEON_SURF_MODE_LINEAR) use_staging_texture = TRUE; /* XXX: Use a staging texture for uploads if the underlying BO * is busy. No interface for checking that currently? so do * it eagerly whenever the transfer doesn't require a readback * and might block. */ if ((usage & PIPE_TRANSFER_WRITE) && !(usage & (PIPE_TRANSFER_READ | PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_UNSYNCHRONIZED))) use_staging_texture = TRUE; if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) use_staging_texture = FALSE; if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) return NULL; trans = CALLOC_STRUCT(r600_transfer); if (trans == NULL) return NULL; pipe_resource_reference(&trans->transfer.resource, texture); trans->transfer.level = level; trans->transfer.usage = usage; trans->transfer.box = *box; if (rtex->is_depth) { /* XXX: only readback the rectangle which is being mapped? */ /* XXX: when discard is true, no need to read back from depth texture */ struct r600_resource_texture *staging_depth; if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } si_blit_uncompress_depth(ctx, rtex, staging_depth, level, level, box->z, box->z + box->depth - 1); trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes; trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size; trans->offset = r600_texture_get_offset(staging_depth, level, box->z); trans->staging = &staging_depth->resource.b.b; } else if (use_staging_texture) { struct pipe_resource resource; struct r600_resource_texture *staging; memset(&resource, 0, sizeof(resource)); resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; resource.depth0 = 1; resource.array_size = 1; resource.usage = PIPE_USAGE_STAGING; resource.flags = R600_RESOURCE_FLAG_TRANSFER; /* We must set the correct texture target and dimensions if needed for a 3D transfer. */ if (box->depth > 1 && util_max_layer(texture, level) > 0) resource.target = texture->target; else resource.target = PIPE_TEXTURE_2D; switch (resource.target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: resource.array_size = box->depth; break; case PIPE_TEXTURE_3D: resource.depth0 = box->depth; break; default:; } /* Create the temporary texture. */ staging = (struct r600_resource_texture*)ctx->screen->resource_create(ctx->screen, &resource); if (staging == NULL) { R600_ERR("failed to create temporary texture to hold untiled copy\n"); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } trans->staging = &staging->resource.b.b; trans->transfer.stride = staging->surface.level[0].pitch_bytes; trans->transfer.layer_stride = staging->surface.level[0].slice_size; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); /* Always referenced in the blit. */ radeonsi_flush(ctx, NULL, 0); } } else { trans->transfer.stride = rtex->surface.level[level].pitch_bytes; trans->transfer.layer_stride = rtex->surface.level[level].slice_size; trans->offset = r600_texture_get_offset(rtex, level, box->z); } if (trans->staging) { buf = si_resource(trans->staging)->cs_buf; } else { buf = rtex->resource.cs_buf; } if (rtex->is_depth || !trans->staging) offset = trans->offset + box->y / util_format_get_blockheight(format) * trans->transfer.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); if (!(map = rctx->ws->buffer_map(buf, rctx->cs, usage))) { pipe_resource_reference(&trans->staging, NULL); pipe_resource_reference(&trans->transfer.resource, NULL); FREE(trans); return NULL; } *ptransfer = &trans->transfer; return map + offset; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; si_init_blit_functions(sctx); si_init_compute_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush, sctx, NULL); sctx->b.rings.gfx.flush = si_context_gfx_flush; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.s.cache_flush = &sctx->cache_flush; sctx->msaa_config = si_atom_msaa_config; sctx->atoms.s.msaa_config = &sctx->msaa_config; sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom; sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom; switch (sctx->b.chip_class) { case SI: case CIK: si_init_state_functions(sctx); si_init_config(sctx); break; default: R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class); goto fail; } sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&sctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); sctx->b.b.bind_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0); } return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }
static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; struct radeon_winsys *ws = rscreen->b.ws; if (rctx == NULL) return NULL; rctx->b.b.screen = screen; rctx->b.b.priv = priv; rctx->b.b.destroy = r600_destroy_context; if (!r600_common_context_init(&rctx->b, &rscreen->b)) goto fail; rctx->screen = rscreen; rctx->keep_tiling_flags = rscreen->b.info.drm_minor >= 12; r600_init_blit_functions(rctx); if (rscreen->b.info.has_uvd) { rctx->b.b.create_video_codec = r600_uvd_create_decoder; rctx->b.b.create_video_buffer = r600_video_buffer_create; } else { rctx->b.b.create_video_codec = vl_create_decoder; rctx->b.b.create_video_buffer = vl_video_buffer_create; } r600_init_common_state_functions(rctx); switch (rctx->b.chip_class) { case R600: case R700: r600_init_state_functions(rctx); r600_init_atom_start_cs(rctx); rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = rctx->b.chip_class == R700 ? r700_create_resolve_blend(rctx) : r600_create_resolve_blend(rctx); rctx->custom_blend_decompress = r600_create_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RS780 || rctx->b.family == CHIP_RS880 || rctx->b.family == CHIP_RV710); break; case EVERGREEN: case CAYMAN: evergreen_init_state_functions(rctx); evergreen_init_atom_start_cs(rctx); evergreen_init_atom_start_compute_cs(rctx); rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); rctx->custom_blend_fastclear = evergreen_create_fastclear_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_CEDAR || rctx->b.family == CHIP_PALM || rctx->b.family == CHIP_SUMO || rctx->b.family == CHIP_SUMO2 || rctx->b.family == CHIP_CAICOS || rctx->b.family == CHIP_CAYMAN || rctx->b.family == CHIP_ARUBA); break; default: R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); goto fail; } rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, r600_context_gfx_flush, rctx, rscreen->b.trace_bo ? rscreen->b.trace_bo->cs_buf : NULL); rctx->b.rings.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256, 0, PIPE_USAGE_DEFAULT, FALSE); if (!rctx->allocator_fetch_shader) goto fail; rctx->isa = calloc(1, sizeof(struct r600_isa)); if (!rctx->isa || r600_isa_init(rctx, rctx->isa)) goto fail; rctx->blitter = util_blitter_create(&rctx->b.b); if (rctx->blitter == NULL) goto fail; util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); r600_query_init_backend_mask(&rctx->b); /* this emits commands and must be last */ rctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&rctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); return &rctx->b.b; fail: r600_destroy_context(&rctx->b.b); return NULL; }
static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; int shader, i; if (rctx == NULL) return NULL; if (!r600_common_context_init(&rctx->b, &rscreen->b)) goto fail; rctx->b.b.screen = screen; rctx->b.b.priv = priv; rctx->b.b.destroy = r600_destroy_context; rctx->b.b.flush = r600_flush_from_st; /* Easy accessing of screen/winsys. */ rctx->screen = rscreen; si_init_blit_functions(rctx); r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); si_init_compute_functions(rctx); if (rscreen->b.info.has_uvd) { rctx->b.b.create_video_codec = radeonsi_uvd_create_decoder; rctx->b.b.create_video_buffer = radeonsi_video_buffer_create; } else { rctx->b.b.create_video_codec = vl_create_decoder; rctx->b.b.create_video_buffer = vl_video_buffer_create; } rctx->b.rings.gfx.cs = rctx->b.ws->cs_create(rctx->b.ws, RING_GFX, NULL); rctx->b.rings.gfx.flush = r600_flush_from_winsys; si_init_all_descriptors(rctx); /* Initialize cache_flush. */ rctx->cache_flush = si_atom_cache_flush; rctx->atoms.cache_flush = &rctx->cache_flush; rctx->atoms.streamout_begin = &rctx->b.streamout.begin_atom; switch (rctx->b.chip_class) { case SI: case CIK: si_init_state_functions(rctx); LIST_INITHEAD(&rctx->active_nontimer_query_list); rctx->max_db = 8; si_init_config(rctx); break; default: R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); goto fail; } rctx->b.ws->cs_set_flush_callback(rctx->b.rings.gfx.cs, r600_flush_from_winsys, rctx); util_slab_create(&rctx->pool_transfers, sizeof(struct pipe_transfer), 64, UTIL_SLAB_SINGLETHREADED); rctx->uploader = u_upload_create(&rctx->b.b, 1024 * 1024, 256, PIPE_BIND_INDEX_BUFFER | PIPE_BIND_CONSTANT_BUFFER); if (!rctx->uploader) goto fail; rctx->blitter = util_blitter_create(&rctx->b.b); if (rctx->blitter == NULL) goto fail; rctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&rctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); /* these must be last */ si_begin_new_cs(rctx); si_get_backend_mask(rctx); /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (rctx->b.chip_class == CIK) { rctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STATIC, 16); rctx->null_const_buf.buffer_size = rctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < NUM_CONST_BUFFERS; i++) { rctx->b.b.set_constant_buffer(&rctx->b.b, shader, i, &rctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ rctx->b.clear_buffer(&rctx->b.b, rctx->null_const_buf.buffer, 0, rctx->null_const_buf.buffer->width0, 0); } return &rctx->b.b; fail: r600_destroy_context(&rctx->b.b); return NULL; }