static void * svga_create_gs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct svga_context *svga = svga_context(pipe); struct svga_geometry_shader *gs = CALLOC_STRUCT(svga_geometry_shader); if (!gs) return NULL; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATEGS); gs->base.tokens = tgsi_dup_tokens(templ->tokens); /* Collect basic info that we'll need later: */ tgsi_scan_shader(gs->base.tokens, &gs->base.info); gs->draw_shader = draw_create_geometry_shader(svga->swtnl.draw, templ); gs->base.id = svga->debug.shader_id++; gs->generic_outputs = svga_get_generic_outputs_mask(&gs->base.info); /* check for any stream output declarations */ if (templ->stream_output.num_outputs) { gs->base.stream_output = svga_create_stream_output(svga, &gs->base, &templ->stream_output); } SVGA_STATS_TIME_POP(svga_sws(svga)); return gs; }
static boolean svga_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) { struct svga_winsys_screen *sws = svga_screen(screen)->sws; boolean retVal; SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_FENCEFINISH); if (!timeout) { retVal = sws->fence_signalled(sws, fence, 0) == 0; } else { SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n", __FUNCTION__, fence); retVal = sws->fence_finish(sws, fence, 0) == 0; } SVGA_STATS_TIME_POP(sws); return retVal; }
static enum pipe_error retry_draw_range_elements( struct svga_context *svga, struct pipe_resource *index_buffer, unsigned index_size, int index_bias, unsigned min_index, unsigned max_index, enum pipe_prim_type prim, unsigned start, unsigned count, unsigned start_instance, unsigned instance_count, boolean do_retry ) { enum pipe_error ret = PIPE_OK; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWELEMENTS); svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret != PIPE_OK) goto retry; /** determine if flatshade is to be used after svga_update_state() * in case the fragment shader is changed. */ svga_hwtnl_set_flatshade(svga->hwtnl, svga->curr.rast->templ.flatshade || svga->state.hw_draw.fs->uses_flat_interp, svga->curr.rast->templ.flatshade_first); ret = svga_hwtnl_draw_range_elements( svga->hwtnl, index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, start_instance, instance_count); if (ret != PIPE_OK) goto retry; goto done; retry: svga_context_flush( svga, NULL ); if (do_retry) { ret = retry_draw_range_elements(svga, index_buffer, index_size, index_bias, min_index, max_index, prim, start, count, start_instance, instance_count, FALSE); } done: SVGA_STATS_TIME_POP(svga_sws(svga)); return ret; }
/** * All drawing filters down into this function, either directly * on the hardware path or after doing software vertex processing. */ enum pipe_error svga_hwtnl_prim(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange * range, unsigned vcount, unsigned min_index, unsigned max_index, struct pipe_resource *ib, unsigned start_instance, unsigned instance_count) { enum pipe_error ret = PIPE_OK; SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLPRIM); if (svga_have_vgpu10(hwtnl->svga)) { /* draw immediately */ ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib, start_instance, instance_count); if (ret != PIPE_OK) { svga_context_flush(hwtnl->svga, NULL); ret = draw_vgpu10(hwtnl, range, vcount, min_index, max_index, ib, start_instance, instance_count); assert(ret == PIPE_OK); } } else { /* batch up drawing commands */ #ifdef DEBUG check_draw_params(hwtnl, range, min_index, max_index, ib); assert(start_instance == 0); assert(instance_count <= 1); #else (void) check_draw_params; #endif if (hwtnl->cmd.prim_count + 1 >= QSZ) { ret = svga_hwtnl_flush(hwtnl); if (ret != PIPE_OK) goto done; } /* min/max indices are relative to bias */ hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index; hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index; hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range; hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias; pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib); hwtnl->cmd.prim_count++; } done: SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws); return ret; }
/** * Fallback to the copy region utility which uses map/memcpy for the copy */ static void copy_region_fallback(struct svga_context *svga, struct pipe_resource *dst_tex, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src_tex, unsigned src_level, const struct pipe_box *src_box) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_COPYREGIONFALLBACK); util_resource_copy_region(&svga->pipe, dst_tex, dst_level, dstx, dsty, dstz, src_tex, src_level, src_box); SVGA_STATS_TIME_POP(sws); (void) sws; }
/** * Emit any pending drawing commands to the command buffer. * When we receive VGPU9 drawing commands we accumulate them and don't * immediately emit them into the command buffer. * This function needs to be called before we change state that could * effect those pending draws. */ enum pipe_error svga_hwtnl_flush(struct svga_hwtnl *hwtnl) { enum pipe_error ret = PIPE_OK; SVGA_STATS_TIME_PUSH(svga_sws(hwtnl->svga), SVGA_STATS_TIME_HWTNLFLUSH); if (!svga_have_vgpu10(hwtnl->svga) && hwtnl->cmd.prim_count) { /* we only queue up primitive for VGPU9 */ ret = draw_vgpu9(hwtnl); } SVGA_STATS_TIME_POP(svga_screen(hwtnl->svga->pipe.screen)->sws); return ret; }
static enum pipe_error retry_draw_arrays( struct svga_context *svga, enum pipe_prim_type prim, unsigned start, unsigned count, unsigned start_instance, unsigned instance_count, boolean do_retry ) { enum pipe_error ret; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWARRAYS); svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode); ret = svga_update_state( svga, SVGA_STATE_HW_DRAW ); if (ret != PIPE_OK) goto retry; /** determine if flatshade is to be used after svga_update_state() * in case the fragment shader is changed. */ svga_hwtnl_set_flatshade(svga->hwtnl, svga->curr.rast->templ.flatshade || svga->state.hw_draw.fs->uses_flat_interp, svga->curr.rast->templ.flatshade_first); ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count, start_instance, instance_count); if (ret != PIPE_OK) goto retry; goto done; retry: if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry) { svga_context_flush( svga, NULL ); ret = retry_draw_arrays(svga, prim, start, count, start_instance, instance_count, FALSE); } done: SVGA_STATS_TIME_POP(svga_sws(svga)); return ret; }
static void svga_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(transfer->resource); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERUNMAP); mtx_lock(&ss->swc_mutex); assert(sbuf->map.count); if (sbuf->map.count) { --sbuf->map.count; } if (svga_buffer_has_hw_storage(sbuf)) { /* Note: we may wind up flushing here and unmapping other buffers * which leads to recursively locking ss->swc_mutex. */ svga_buffer_hw_storage_unmap(svga, sbuf); } if (transfer->usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { /* * Mapped range not flushed explicitly, so flush the whole buffer, * and tell the host to discard the contents when processing the DMA * command. */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); sbuf->dma.flags.discard = TRUE; svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } } mtx_unlock(&ss->swc_mutex); FREE(transfer); SVGA_STATS_TIME_POP(svga_sws(svga)); }
enum pipe_error svga_update_state(struct svga_context *svga, unsigned max_level) { struct svga_screen *screen = svga_screen(svga->pipe.screen); enum pipe_error ret = PIPE_OK; unsigned i; SVGA_STATS_TIME_PUSH(screen->sws, SVGA_STATS_TIME_UPDATESTATE); /* Check for updates to bound textures. This can't be done in an * atom as there is no flag which could provoke this test, and we * cannot create one. */ if (svga->state.texture_timestamp != screen->texture_timestamp) { svga->state.texture_timestamp = screen->texture_timestamp; svga->dirty |= SVGA_NEW_TEXTURE; } for (i = 0; i <= max_level; i++) { svga->dirty |= svga->state.dirty[i]; if (svga->dirty) { ret = update_state( svga, state_levels[i], &svga->dirty ); if (ret != PIPE_OK) goto done; svga->state.dirty[i] = 0; } } for (; i < SVGA_STATE_MAX; i++) svga->state.dirty[i] |= svga->dirty; svga->dirty = 0; svga->hud.num_validations++; done: SVGA_STATS_TIME_POP(screen->sws); return ret; }
static void * svga_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct svga_context *svga = svga_context(pipe); struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader); if (!vs) return NULL; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATEVS); /* substitute a debug shader? */ vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id, templ->tokens)); /* Collect basic info that we'll need later: */ tgsi_scan_shader(vs->base.tokens, &vs->base.info); { /* Need to do construct a new template in case we substitued a * debug shader. */ struct pipe_shader_state tmp2 = *templ; tmp2.tokens = vs->base.tokens; vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2); } vs->base.id = svga->debug.shader_id++; vs->generic_outputs = svga_get_generic_outputs_mask(&vs->base.info); /* check for any stream output declarations */ if (templ->stream_output.num_outputs) { vs->base.stream_output = svga_create_stream_output(svga, &vs->base, &templ->stream_output); } SVGA_STATS_TIME_POP(svga_sws(svga)); return vs; }
static void svga_texture_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); struct svga_texture *tex = svga_texture(transfer->resource); SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_TEXTRANSFERUNMAP); if (!st->use_direct_map) { svga_texture_transfer_unmap_dma(svga, st); } else if (st->upload.buf) { svga_texture_transfer_unmap_upload(svga, st); } else { svga_texture_transfer_unmap_direct(svga, st); } if (st->base.usage & PIPE_TRANSFER_WRITE) { svga->hud.num_resource_updates++; /* Mark the texture level as dirty */ ss->texture_timestamp++; svga_age_texture_view(tex, transfer->level); if (transfer->resource->target == PIPE_TEXTURE_CUBE) svga_define_texture_level(tex, st->slice, transfer->level); else svga_define_texture_level(tex, 0, transfer->level); } pipe_resource_reference(&st->base.resource, NULL); FREE(st); SVGA_STATS_TIME_POP(sws); (void) sws; }
/** * Request a transfer map to the texture resource */ static void * svga_texture_transfer_map(struct pipe_context *pipe, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws; struct svga_texture *tex = svga_texture(texture); struct svga_transfer *st; struct svga_winsys_surface *surf = tex->handle; boolean use_direct_map = svga_have_gb_objects(svga) && !svga_have_gb_dma(svga); void *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_TEXTRANSFERMAP); if (!surf) goto done; /* We can't map texture storage directly unless we have GB objects */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { if (svga_have_gb_objects(svga)) use_direct_map = TRUE; else goto done; } st = CALLOC_STRUCT(svga_transfer); if (!st) goto done; st->base.level = level; st->base.usage = usage; st->base.box = *box; /* The modified transfer map box with the array index removed from z. * The array index is specified in slice. */ st->box.x = box->x; st->box.y = box->y; st->box.z = box->z; st->box.w = box->width; st->box.h = box->height; st->box.d = box->depth; switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: st->slice = st->base.box.z; st->box.z = 0; /* so we don't apply double offsets below */ break; case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: st->slice = st->base.box.z; st->box.z = 0; /* so we don't apply double offsets below */ /* Force direct map for transfering multiple slices */ if (st->base.box.depth > 1) use_direct_map = svga_have_gb_objects(svga); break; default: st->slice = 0; break; } /* Force direct map for multisample surface */ if (texture->nr_samples > 1) { assert(svga_have_gb_objects(svga)); assert(sws->have_sm4_1); use_direct_map = TRUE; } st->use_direct_map = use_direct_map; pipe_resource_reference(&st->base.resource, texture); /* If this is the first time mapping to the surface in this * command buffer, clear the dirty masks of this surface. */ if (sws->surface_is_flushed(sws, surf)) { svga_clear_texture_dirty(tex); } if (!use_direct_map) { /* upload to the DMA buffer */ map = svga_texture_transfer_map_dma(svga, st); } else { boolean can_use_upload = tex->can_use_upload && !(st->base.usage & PIPE_TRANSFER_READ); boolean was_rendered_to = svga_was_texture_rendered_to(svga_texture(texture), st->slice, st->base.level); /* If the texture was already rendered to and upload buffer * is supported, then we will use upload buffer to * avoid the need to read back the texture content; otherwise, * we'll first try to map directly to the GB surface, if it is blocked, * then we'll try the upload buffer. */ if (was_rendered_to && can_use_upload) { map = svga_texture_transfer_map_upload(svga, st); } else { unsigned orig_usage = st->base.usage; /* First try directly map to the GB surface */ if (can_use_upload) st->base.usage |= PIPE_TRANSFER_DONTBLOCK; map = svga_texture_transfer_map_direct(svga, st); st->base.usage = orig_usage; if (!map && can_use_upload) { /* if direct map with DONTBLOCK fails, then try upload to the * texture upload buffer. */ map = svga_texture_transfer_map_upload(svga, st); } } /* If upload fails, then try direct map again without forcing it * to DONTBLOCK. */ if (!map) { map = svga_texture_transfer_map_direct(svga, st); } } if (!map) { FREE(st); } else { *ptransfer = &st->base; svga->hud.num_textures_mapped++; if (usage & PIPE_TRANSFER_WRITE) { /* record texture upload for HUD */ svga->hud.num_bytes_uploaded += st->base.layer_stride * st->box.d; /* mark this texture level as dirty */ svga_set_texture_dirty(tex, st->slice, level); } } done: svga->hud.map_buffer_time += (svga_get_time(svga) - begin); SVGA_STATS_TIME_POP(sws); (void) sws; return map; }
/** * Parse TGSI shader and translate to SVGA/DX9 serialized * representation. * * In this function SVGA shader is emitted to an in-memory buffer that * can be dynamically grown. Once we've finished and know how large * it is, it will be copied to a hardware buffer for upload. */ struct svga_shader_variant * svga_tgsi_vgpu9_translate(struct svga_context *svga, const struct svga_shader *shader, const struct svga_compile_key *key, enum pipe_shader_type unit) { struct svga_shader_variant *variant = NULL; struct svga_shader_emitter emit; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU9TRANSLATE); memset(&emit, 0, sizeof(emit)); emit.size = 1024; emit.buf = MALLOC(emit.size); if (emit.buf == NULL) { goto fail; } emit.ptr = emit.buf; emit.unit = unit; emit.key = *key; tgsi_scan_shader(shader->tokens, &emit.info); emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1; if (unit == PIPE_SHADER_FRAGMENT) emit.imm_start += key->num_unnormalized_coords; if (unit == PIPE_SHADER_VERTEX) { emit.imm_start += key->vs.need_prescale ? 2 : 0; } emit.nr_hw_float_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1); emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1; if (emit.nr_hw_temp >= SVGA3D_TEMPREG_MAX) { debug_printf("svga: too many temporary registers (%u)\n", emit.nr_hw_temp); goto fail; } if (emit.info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { debug_printf( "svga: indirect indexing of temporary registers is not supported.\n"); goto fail; } emit.in_main_func = TRUE; if (!svga_shader_emit_header(&emit)) { debug_printf("svga: emit header failed\n"); goto fail; } if (!svga_shader_emit_instructions(&emit, shader->tokens)) { debug_printf("svga: emit instructions failed\n"); goto fail; } variant = svga_new_shader_variant(svga); if (!variant) goto fail; variant->shader = shader; variant->tokens = (const unsigned *) emit.buf; variant->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&variant->key, key, sizeof(*key)); variant->id = UTIL_BITMASK_INVALID_INDEX; variant->pstipple_sampler_unit = emit.pstipple_sampler_unit; /* If there was exactly one write to a fragment shader output register * and it came from a constant buffer, we know all fragments will have * the same color (except for blending). */ variant->constant_color_output = emit.constant_color_output && emit.num_output_writes == 1; #if 0 if (!svga_shader_verify(variant->tokens, variant->nr_tokens) || SVGA_DEBUG & DEBUG_TGSI) { debug_printf("#####################################\n"); debug_printf("Shader %u below\n", shader->id); tgsi_dump(shader->tokens, 0); if (SVGA_DEBUG & DEBUG_TGSI) { debug_printf("Shader %u compiled below\n", shader->id); svga_shader_dump(variant->tokens, variant->nr_tokens, FALSE); } debug_printf("#####################################\n"); } #endif goto done; fail: FREE(variant); if (emit.buf != err_buf) FREE(emit.buf); variant = NULL; done: SVGA_STATS_TIME_POP(svga_sws(svga)); return variant; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static void * svga_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); transfer = MALLOC_STRUCT(pipe_transfer); if (!transfer) { goto done; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; transfer->layer_stride = 0; if (usage & PIPE_TRANSFER_WRITE) { /* If we write to the buffer for any reason, free any saved translated * vertices. */ pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). */ assert(svga_have_vgpu10(svga)); if (!sbuf->user) { (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); } if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); svga_context_finish(svga); } assert(sbuf->handle); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); assert(ret == PIPE_OK); } svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; } if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. * With GB objects, the map operation takes care of this * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, * and the old backing store is busy. */ if (!svga_have_gb_objects(svga)) svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (svga_buffer_has_hw_storage(sbuf)) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); goto done; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); goto done; } } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); } } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; *ptransfer = transfer; } else { FREE(transfer); } svga->hud.map_buffer_time += (svga_get_time(svga) - begin); done: SVGA_STATS_TIME_POP(svga_sws(svga)); return map; }
static void svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct svga_context *svga = svga_context( pipe ); unsigned reduced_prim = u_reduced_prim( info->mode ); unsigned count = info->count; enum pipe_error ret = 0; boolean needed_swtnl; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWVBO); svga->hud.num_draw_calls++; /* for SVGA_QUERY_NUM_DRAW_CALLS */ if (u_reduced_prim(info->mode) == PIPE_PRIM_TRIANGLES && svga->curr.rast->templ.cull_face == PIPE_FACE_FRONT_AND_BACK) goto done; /* * Mark currently bound target surfaces as dirty * doesn't really matter if it is done before drawing. * * TODO If we ever normaly return something other then * true we should not mark it as dirty then. */ svga_mark_surfaces_dirty(svga_context(pipe)); if (svga->curr.reduced_prim != reduced_prim) { svga->curr.reduced_prim = reduced_prim; svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE; } if (need_fallback_prim_restart(svga, info)) { enum pipe_error r; r = util_draw_vbo_without_prim_restart(pipe, &svga->curr.ib, info); assert(r == PIPE_OK); (void) r; goto done; } if (!u_trim_pipe_prim( info->mode, &count )) goto done; needed_swtnl = svga->state.sw.need_swtnl; svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL ); if (svga->state.sw.need_swtnl) { svga->hud.num_fallbacks++; /* for SVGA_QUERY_NUM_FALLBACKS */ if (!needed_swtnl) { /* * We're switching from HW to SW TNL. SW TNL will require mapping all * currently bound vertex buffers, some of which may already be * referenced in the current command buffer as result of previous HW * TNL. So flush now, to prevent the context to flush while a referred * vertex buffer is mapped. */ svga_context_flush(svga, NULL); } /* Avoid leaking the previous hwtnl bias to swtnl */ svga_hwtnl_set_index_bias( svga->hwtnl, 0 ); ret = svga_swtnl_draw_vbo( svga, info ); } else { if (info->indexed && svga->curr.ib.buffer) { unsigned offset; assert(svga->curr.ib.offset % svga->curr.ib.index_size == 0); offset = svga->curr.ib.offset / svga->curr.ib.index_size; ret = retry_draw_range_elements( svga, svga->curr.ib.buffer, svga->curr.ib.index_size, info->index_bias, info->min_index, info->max_index, info->mode, info->start + offset, count, info->start_instance, info->instance_count, TRUE ); } else { ret = retry_draw_arrays(svga, info->mode, info->start, count, info->start_instance, info->instance_count, TRUE); } } /* XXX: Silence warnings, do something sensible here? */ (void)ret; if (SVGA_DEBUG & DEBUG_FLUSH) { svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } done: SVGA_STATS_TIME_POP(svga_sws(svga)); ; }
/** * Define a vgpu10 sampler state. */ static void define_sampler_state_object(struct svga_context *svga, struct svga_sampler_state *ss, const struct pipe_sampler_state *ps) { uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */ boolean anisotropic; uint8 compare_func; SVGA3dFilter filter; SVGA3dRGBAFloat bcolor; unsigned try; float min_lod, max_lod; assert(svga_have_vgpu10(svga)); anisotropic = ss->aniso_level > 1.0f; filter = translate_filter_mode(ps->min_mip_filter, ps->min_img_filter, ps->mag_img_filter, anisotropic, ss->compare_mode); compare_func = translate_comparison_func(ss->compare_func); COPY_4V(bcolor.value, ps->border_color.f); assert(ps->min_lod <= ps->max_lod); if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* just use the base level image */ min_lod = max_lod = 0.0f; } else { min_lod = ps->min_lod; max_lod = ps->max_lod; } /* If shadow comparisons are enabled, create two sampler states: one * with the given shadow compare mode, another with shadow comparison off. * We need the later because in some cases, we have to do the shadow * compare in the shader. So, we don't want to do it twice. */ STATIC_ASSERT(PIPE_TEX_COMPARE_NONE == 0); STATIC_ASSERT(PIPE_TEX_COMPARE_R_TO_TEXTURE == 1); ss->id[1] = SVGA3D_INVALID_ID; unsigned i; for (i = 0; i <= ss->compare_mode; i++) { ss->id[i] = util_bitmask_add(svga->sampler_object_id_bm); /* Loop in case command buffer is full and we need to flush and retry */ for (try = 0; try < 2; try++) { enum pipe_error ret = SVGA3D_vgpu10_DefineSamplerState(svga->swc, ss->id[i], filter, ss->addressu, ss->addressv, ss->addressw, ss->lod_bias, /* float */ max_aniso, compare_func, bcolor, min_lod, /* float */ max_lod); /* float */ if (ret == PIPE_OK) break; svga_context_flush(svga, NULL); } /* turn off the shadow compare option for second iteration */ filter &= ~SVGA3D_FILTER_COMPARE; } } static void * svga_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { struct svga_context *svga = svga_context(pipe); struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); if (!cso) return NULL; cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( sampler->max_anisotropy, 1 ); if (sampler->max_anisotropy) cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); cso->addressw = translate_wrap_mode(sampler->wrap_r); cso->normalized_coords = sampler->normalized_coords; cso->compare_mode = sampler->compare_mode; cso->compare_func = sampler->compare_func; { uint32 r = float_to_ubyte(sampler->border_color.f[0]); uint32 g = float_to_ubyte(sampler->border_color.f[1]); uint32 b = float_to_ubyte(sampler->border_color.f[2]); uint32 a = float_to_ubyte(sampler->border_color.f[3]); cso->bordercolor = (a << 24) | (r << 16) | (g << 8) | b; } /* No SVGA3D support for: * - min/max LOD clamping */ cso->min_lod = 0; cso->view_min_lod = MAX2((int) (sampler->min_lod + 0.5), 0); cso->view_max_lod = MAX2((int) (sampler->max_lod + 0.5), 0); /* Use min_mipmap */ if (svga->debug.use_min_mipmap) { if (cso->view_min_lod == cso->view_max_lod) { cso->min_lod = cso->view_min_lod; cso->view_min_lod = 0; cso->view_max_lod = 1000; /* Just a high number */ cso->mipfilter = SVGA3D_TEX_FILTER_NONE; } } if (svga_have_vgpu10(svga)) { define_sampler_state_object(svga, cso, sampler); } SVGA_DBG(DEBUG_SAMPLERS, "New sampler: min %u, view(min %u, max %u) lod, mipfilter %s\n", cso->min_lod, cso->view_min_lod, cso->view_max_lod, cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING"); svga->hud.num_sampler_objects++; SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws, SVGA_STATS_COUNT_SAMPLER); return cso; } static void svga_bind_sampler_states(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, void **samplers) { struct svga_context *svga = svga_context(pipe); unsigned i; boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); assert(start + num <= PIPE_MAX_SAMPLERS); /* Pre-VGPU10 only supports FS textures */ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; for (i = 0; i < num; i++) { if (svga->curr.sampler[shader][start + i] != samplers[i]) any_change = TRUE; svga->curr.sampler[shader][start + i] = samplers[i]; } if (!any_change) { return; } /* find highest non-null sampler[] entry */ { unsigned j = MAX2(svga->curr.num_samplers[shader], start + num); while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL) j--; svga->curr.num_samplers[shader] = j; } svga->dirty |= SVGA_NEW_SAMPLER; } static void svga_delete_sampler_state(struct pipe_context *pipe, void *sampler) { struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler; struct svga_context *svga = svga_context(pipe); if (svga_have_vgpu10(svga)) { unsigned i; for (i = 0; i < 2; i++) { enum pipe_error ret; if (ss->id[i] != SVGA3D_INVALID_ID) { svga_hwtnl_flush_retry(svga); ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); } util_bitmask_clear(svga->sampler_object_id_bm, ss->id[i]); } } } FREE(sampler); svga->hud.num_sampler_objects--; } static struct pipe_sampler_view * svga_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) { struct svga_context *svga = svga_context(pipe); struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view); if (!sv) { return NULL; } sv->base = *templ; sv->base.reference.count = 1; sv->base.texture = NULL; pipe_resource_reference(&sv->base.texture, texture); sv->base.context = pipe; sv->id = SVGA3D_INVALID_ID; svga->hud.num_samplerview_objects++; SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws, SVGA_STATS_COUNT_SAMPLERVIEW); return &sv->base; } static void svga_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { struct svga_context *svga = svga_context(pipe); struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view); if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) { if (view->context != pipe) { /* The SVGA3D device will generate an error (and on Linux, cause * us to abort) if we try to destroy a shader resource view from * a context other than the one it was created with. Skip the * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler * view for now. This should only sometimes happen when a shared * texture is deleted. */ _debug_printf("context mismatch in %s\n", __func__); } else { enum pipe_error ret; svga_hwtnl_flush_retry(svga); /* XXX is this needed? */ ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); } util_bitmask_clear(svga->sampler_view_id_bm, sv->id); } } pipe_resource_reference(&sv->base.texture, NULL); FREE(sv); svga->hud.num_samplerview_objects--; } static void svga_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, struct pipe_sampler_view **views) { struct svga_context *svga = svga_context(pipe); unsigned flag_1d = 0; unsigned flag_srgb = 0; uint i; boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); assert(start + num <= ARRAY_SIZE(svga->curr.sampler_views[shader])); /* Pre-VGPU10 only supports FS textures */ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SETSAMPLERVIEWS); /* This bit of code works around a quirk in the CSO module. * If start=num=0 it means all sampler views should be released. * Note that the CSO module treats sampler views for fragment shaders * differently than other shader types. */ if (start == 0 && num == 0 && svga->curr.num_sampler_views[shader] > 0) { for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) { pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][i]); } any_change = TRUE; } for (i = 0; i < num; i++) { enum pipe_texture_target target; if (svga->curr.sampler_views[shader][start + i] != views[i]) { /* Note: we're using pipe_sampler_view_release() here to work around * a possible crash when the old view belongs to another context that * was already destroyed. */ pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]); pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i], views[i]); any_change = TRUE; } if (!views[i]) continue; if (util_format_is_srgb(views[i]->format)) flag_srgb |= 1 << (start + i); target = views[i]->target; if (target == PIPE_TEXTURE_1D) { flag_1d |= 1 << (start + i); } else if (target == PIPE_TEXTURE_RECT) { /* If the size of the bound texture changes, we need to emit new * const buffer values. */ svga->dirty |= SVGA_NEW_TEXTURE_CONSTS; } else if (target == PIPE_BUFFER) { /* If the size of the bound buffer changes, we need to emit new * const buffer values. */ svga->dirty |= SVGA_NEW_TEXTURE_CONSTS; } } if (!any_change) { goto done; } /* find highest non-null sampler_views[] entry */ { unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num); while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL) j--; svga->curr.num_sampler_views[shader] = j; } svga->dirty |= SVGA_NEW_TEXTURE_BINDING; if (flag_srgb != svga->curr.tex_flags.flag_srgb || flag_1d != svga->curr.tex_flags.flag_1d) { svga->dirty |= SVGA_NEW_TEXTURE_FLAGS; svga->curr.tex_flags.flag_1d = flag_1d; svga->curr.tex_flags.flag_srgb = flag_srgb; } /* Check if any of the sampler view resources collide with the framebuffer * color buffers or depth stencil resource. If so, set the NEW_FRAME_BUFFER * dirty bit so that emit_framebuffer can be invoked to create backed view * for the conflicted surface view. */ if (svga_check_sampler_framebuffer_resource_collision(svga, shader)) { svga->dirty |= SVGA_NEW_FRAME_BUFFER; } done: SVGA_STATS_TIME_POP(svga_sws(svga)); } /** * Clean up sampler, sampler view state at context destruction time */ void svga_cleanup_sampler_state(struct svga_context *svga) { enum pipe_shader_type shader; for (shader = 0; shader <= PIPE_SHADER_GEOMETRY; shader++) { unsigned i; for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { pipe_sampler_view_release(&svga->pipe, &svga->state.hw_draw.sampler_views[shader][i]); } } /* free polygon stipple state */ if (svga->polygon_stipple.sampler) { svga->pipe.delete_sampler_state(&svga->pipe, svga->polygon_stipple.sampler); } if (svga->polygon_stipple.sampler_view) { svga->pipe.sampler_view_destroy(&svga->pipe, &svga->polygon_stipple.sampler_view->base); } pipe_resource_reference(&svga->polygon_stipple.texture, NULL); } void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; svga->pipe.bind_sampler_states = svga_bind_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; svga->pipe.set_sampler_views = svga_set_sampler_views; svga->pipe.create_sampler_view = svga_create_sampler_view; svga->pipe.sampler_view_destroy = svga_sampler_view_destroy; }