static void svga_user_buffer_range(struct svga_context *svga, unsigned start, unsigned count, unsigned instance_count) { const struct pipe_vertex_element *ve = svga->curr.velems->velem; int i; /* * Release old uploaded range (if not done already) and * initialize new ranges. */ for (i=0; i < svga->curr.velems->count; i++) { struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { struct svga_buffer *buffer = svga_buffer(vb->buffer); pipe_resource_reference(&buffer->uploaded.buffer, NULL); buffer->uploaded.start = ~0; buffer->uploaded.end = 0; } } for (i=0; i < svga->curr.velems->count; i++) { struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { struct svga_buffer *buffer = svga_buffer(vb->buffer); unsigned first, size; unsigned instance_div = ve[i].instance_divisor; unsigned elemSize = util_format_get_blocksize(ve[i].src_format); svga->dirty |= SVGA_NEW_VBUFFER; if (instance_div) { first = ve[i].src_offset; count = (instance_count + instance_div - 1) / instance_div; size = vb->stride * (count - 1) + elemSize; } else if (vb->stride) { first = vb->stride * start + ve[i].src_offset; size = vb->stride * (count - 1) + elemSize; } else { /* Only a single vertex! * Upload with the largest vertex size the hw supports, * if possible. */ first = ve[i].src_offset; size = MIN2(16, vb->buffer->width0); } buffer->uploaded.start = MIN2(buffer->uploaded.start, first); buffer->uploaded.end = MAX2(buffer->uploaded.end, first + size); } } }
struct svga_winsys_surface * svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer) { struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen); struct svga_winsys_surface *vsurf = NULL; assert(svga_buffer(buffer)->key.cachable == 0); svga_buffer(buffer)->key.cachable = 0; sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle); return vsurf; }
static enum pipe_error validate_constant_buffers(struct svga_context *svga) { unsigned shader; assert(svga_have_vgpu10(svga)); for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { enum pipe_error ret; struct svga_buffer *buffer; struct svga_winsys_surface *handle; unsigned enabled_constbufs; /* Rebind the default constant buffer if needed */ if (svga->rebind.flags.constbufs) { buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]); if (buffer) { ret = svga->swc->resource_rebind(svga->swc, buffer->handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } /* * Reference other bound constant buffers to ensure pending updates are * noticed by the device. */ enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u; while (enabled_constbufs) { unsigned i = u_bit_scan(&enabled_constbufs); buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer); if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); if (svga->rebind.flags.constbufs) { ret = svga->swc->resource_rebind(svga->swc, handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } } } svga->rebind.flags.constbufs = FALSE; return PIPE_OK; }
/** * Map a range of a buffer. */ static void * svga_buffer_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct svga_buffer *sbuf = svga_buffer(transfer->resource); uint8_t *map; if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (sbuf->hwbuf) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; map = sws->buffer_map(sws, sbuf->hwbuf, transfer->usage); } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; } return map; }
struct pipe_buffer * svga_screen_buffer_wrap_surface(struct pipe_screen *screen, enum SVGA3dSurfaceFormat format, struct svga_winsys_surface *srf) { struct pipe_buffer *buf; struct svga_buffer *sbuf; struct svga_winsys_screen *sws = svga_winsys_screen(screen); buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0); if (!buf) return NULL; sbuf = svga_buffer(buf); /* * We are not the creator of this surface and therefore we must not * cache it for reuse. Set the cacheable flag to zero in the key to * prevent this. */ sbuf->key.format = format; sbuf->key.cachable = 0; sws->surface_reference(sws, &sbuf->handle, srf); return buf; }
static void svga_buffer_unmap( struct pipe_screen *screen, struct pipe_buffer *buf) { struct svga_screen *ss = svga_screen(screen); struct svga_winsys_screen *sws = ss->sws; struct svga_buffer *sbuf = svga_buffer( buf ); pipe_mutex_lock(ss->swc_mutex); assert(sbuf->map.count); if(sbuf->map.count) --sbuf->map.count; if(sbuf->hwbuf) sws->buffer_unmap(sws, sbuf->hwbuf); if(sbuf->map.writing) { if(!sbuf->map.flush_explicit) { /* No mapped range was flushed -- flush the whole buffer */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); svga_buffer_add_range(sbuf, 0, sbuf->base.size); } sbuf->map.writing = FALSE; sbuf->map.flush_explicit = FALSE; } pipe_mutex_unlock(ss->swc_mutex); }
static void svga_buffer_destroy( struct pipe_screen *screen, struct pipe_resource *buf ) { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf = svga_buffer( buf ); assert(!p_atomic_read(&buf->reference.count)); assert(!sbuf->dma.pending); if(sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); if(sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); if(sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); if(sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); FREE(sbuf); }
static void svga_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf = svga_buffer(buf); assert(!p_atomic_read(&buf->reference.count)); assert(!sbuf->dma.pending); if (sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); if (sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); if (sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); if (sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); ss->hud.total_resource_bytes -= sbuf->size; assert(ss->hud.num_resources > 0); if (ss->hud.num_resources > 0) ss->hud.num_resources--; FREE(sbuf); }
/** Get resource handle for a texture or buffer */ static inline struct svga_winsys_surface * svga_resource_handle(struct pipe_resource *res) { if (res->target == PIPE_BUFFER) { return svga_buffer(res)->handle; } else { return svga_texture(res)->handle; } }
static int svga_upload_user_buffers(struct svga_context *svga, unsigned start, unsigned count, unsigned instance_count) { const struct pipe_vertex_element *ve = svga->curr.velems->velem; unsigned i; int ret; svga_user_buffer_range(svga, start, count, instance_count); for (i=0; i < svga->curr.velems->count; i++) { struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { struct svga_buffer *buffer = svga_buffer(vb->buffer); /* * Check if already uploaded. Otherwise go ahead and upload. */ if (buffer->uploaded.buffer) continue; ret = u_upload_buffer( svga->upload_vb, 0, buffer->uploaded.start, buffer->uploaded.end - buffer->uploaded.start, &buffer->b.b, &buffer->uploaded.offset, &buffer->uploaded.buffer); if (ret) return ret; if (0) debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d" " sz %d\n", __FUNCTION__, i, buffer, buffer->uploaded.buffer, buffer->uploaded.offset, buffer->uploaded.start, buffer->uploaded.end - buffer->uploaded.start); vb->buffer_offset = buffer->uploaded.offset; } } return PIPE_OK; }
static void * svga_buffer_map_range( struct pipe_screen *screen, struct pipe_buffer *buf, unsigned offset, unsigned length, unsigned usage ) { struct svga_screen *ss = svga_screen(screen); struct svga_winsys_screen *sws = ss->sws; struct svga_buffer *sbuf = svga_buffer( buf ); void *map; if (!sbuf->swbuf && !sbuf->hwbuf) { if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", __FUNCTION__, (sbuf->base.size + 1023)/1024); sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment); } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (sbuf->hwbuf) { map = sws->buffer_map(sws, sbuf->hwbuf, usage); } else { map = NULL; } if(map) { pipe_mutex_lock(ss->swc_mutex); ++sbuf->map.count; if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) { assert(sbuf->map.count <= 1); sbuf->map.writing = TRUE; if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) sbuf->map.flush_explicit = TRUE; } pipe_mutex_unlock(ss->swc_mutex); } return map; }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
static int upload_user_buffers( struct svga_context *svga ) { enum pipe_error ret = PIPE_OK; int i; int nr; if (0) debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers); nr = svga->curr.num_vertex_buffers; for (i = 0; i < nr; i++) { if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer)) { struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer); if (!buffer->uploaded.buffer) { ret = u_upload_buffer( svga->upload_vb, 0, buffer->base.size, &buffer->base, &buffer->uploaded.offset, &buffer->uploaded.buffer ); if (ret) return ret; if (0) debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", __FUNCTION__, i, buffer, buffer->uploaded.buffer, buffer->uploaded.offset, buffer->base.size); } pipe_buffer_reference( &svga->curr.vb[i].buffer, buffer->uploaded.buffer ); svga->curr.vb[i].buffer_offset = buffer->uploaded.offset; } } if (0) debug_printf("%s: DONE\n", __FUNCTION__); return ret; }
static void svga_buffer_flush_mapped_range( struct pipe_screen *screen, struct pipe_buffer *buf, unsigned offset, unsigned length) { struct svga_buffer *sbuf = svga_buffer( buf ); struct svga_screen *ss = svga_screen(screen); pipe_mutex_lock(ss->swc_mutex); assert(sbuf->map.writing); if(sbuf->map.writing) { assert(sbuf->map.flush_explicit); svga_buffer_add_range(sbuf, offset, offset + length); } pipe_mutex_unlock(ss->swc_mutex); }
void svga_redefine_user_buffer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned offset, unsigned size) { struct svga_buffer *sbuf = svga_buffer(resource); assert(sbuf->user); assert(!sbuf->dma.pending); assert(!sbuf->handle); assert(!sbuf->hwbuf); /* use the default action of simply resizing the user buffer's size */ u_default_redefine_user_buffer(pipe, resource, offset, size); }
static void svga_buffer_transfer_flush_region( struct pipe_context *pipe, struct pipe_transfer *transfer, const struct pipe_box *box) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(transfer->resource); unsigned offset = transfer->box.x + box->x; unsigned length = box->width; assert(transfer->usage & PIPE_TRANSFER_WRITE); assert(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT); pipe_mutex_lock(ss->swc_mutex); svga_buffer_add_range(sbuf, offset, offset + length); pipe_mutex_unlock(ss->swc_mutex); }
static void svga_buffer_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(transfer->resource); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERUNMAP); mtx_lock(&ss->swc_mutex); assert(sbuf->map.count); if (sbuf->map.count) { --sbuf->map.count; } if (svga_buffer_has_hw_storage(sbuf)) { /* Note: we may wind up flushing here and unmapping other buffers * which leads to recursively locking ss->swc_mutex. */ svga_buffer_hw_storage_unmap(svga, sbuf); } if (transfer->usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { /* * Mapped range not flushed explicitly, so flush the whole buffer, * and tell the host to discard the contents when processing the DMA * command. */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); sbuf->dma.flags.discard = TRUE; svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } } mtx_unlock(&ss->swc_mutex); FREE(transfer); SVGA_STATS_TIME_POP(svga_sws(svga)); }
static void svga_release_user_upl_buffers(struct svga_context *svga) { unsigned i; unsigned nr; nr = svga->curr.num_vertex_buffers; for (i = 0; i < nr; ++i) { struct pipe_vertex_buffer *vb = &svga->curr.vb[i]; if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) { struct svga_buffer *buffer = svga_buffer(vb->buffer); buffer->uploaded.start = ~0; buffer->uploaded.end = 0; if (buffer->uploaded.buffer) pipe_resource_reference(&buffer->uploaded.buffer, NULL); } } }
static void svga_buffer_transfer_unmap( struct pipe_context *pipe, struct pipe_transfer *transfer ) { struct svga_screen *ss = svga_screen(pipe->screen); struct svga_context *svga = svga_context(pipe); struct svga_buffer *sbuf = svga_buffer(transfer->resource); pipe_mutex_lock(ss->swc_mutex); assert(sbuf->map.count); if (sbuf->map.count) { --sbuf->map.count; } if (svga_buffer_has_hw_storage(sbuf)) { svga_buffer_hw_storage_unmap(svga, sbuf); } if (transfer->usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { /* * Mapped range not flushed explicitly, so flush the whole buffer, * and tell the host to discard the contents when processing the DMA * command. */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); sbuf->dma.flags.discard = TRUE; svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } } pipe_mutex_unlock(ss->swc_mutex); FREE(transfer); }
/* Get (or create/upload) the winsys surface handle so that we can * refer to this buffer in fifo commands. */ struct svga_winsys_surface * svga_buffer_handle(struct svga_context *svga, struct pipe_resource *buf) { struct pipe_screen *screen = svga->pipe.screen; struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; enum pipe_error ret; if (!buf) return NULL; sbuf = svga_buffer(buf); assert(!sbuf->map.count); assert(!sbuf->user); if (!sbuf->handle) { ret = svga_buffer_create_host_surface(ss, sbuf); if (ret != PIPE_OK) return NULL; } assert(sbuf->handle); if (sbuf->map.num_ranges) { if (!sbuf->dma.pending) { /* * No pending DMA upload yet, so insert a DMA upload command now. */ /* * Migrate the data from swbuf -> hwbuf if necessary. */ ret = svga_buffer_update_hw(ss, sbuf); if (ret == PIPE_OK) { /* * Queue a dma command. */ ret = svga_buffer_upload_command(svga, sbuf); if (ret == PIPE_ERROR_OUT_OF_MEMORY) { svga_context_flush(svga, NULL); ret = svga_buffer_upload_command(svga, sbuf); assert(ret == PIPE_OK); } if (ret == PIPE_OK) { sbuf->dma.pending = TRUE; assert(!sbuf->head.prev && !sbuf->head.next); LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); } } else if (ret == PIPE_ERROR_OUT_OF_MEMORY) { /* * The buffer is too big to fit in the GMR aperture, so break it in * smaller pieces. */ ret = svga_buffer_upload_piecewise(ss, svga, sbuf); } if (ret != PIPE_OK) { /* * Something unexpected happened above. There is very little that * we can do other than proceeding while ignoring the dirty ranges. */ assert(0); sbuf->map.num_ranges = 0; } } else { /* * There a pending dma already. Make sure it is from this context. */ assert(sbuf->dma.svga == svga); } } assert(!sbuf->map.num_ranges || sbuf->dma.pending); return sbuf->handle; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static struct pipe_transfer * svga_buffer_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; transfer = CALLOC_STRUCT(pipe_transfer); if (transfer == NULL) { return NULL; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. */ svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (sbuf->hwbuf) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); return NULL; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !sbuf->hwbuf) { if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); return NULL; } } } return transfer; }
static enum pipe_error draw_vgpu10(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, unsigned vcount, unsigned min_index, unsigned max_index, struct pipe_resource *ib, unsigned start_instance, unsigned instance_count) { struct svga_context *svga = hwtnl->svga; struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle; const unsigned vbuf_count = hwtnl->cmd.vbuf_count; int last_vbuf = -1; enum pipe_error ret; unsigned i; assert(svga_have_vgpu10(svga)); assert(hwtnl->cmd.prim_count == 0); /* We need to reemit all the current resource bindings along with the Draw * command to be sure that the referenced resources are available for the * Draw command, just in case the surfaces associated with the resources * are paged out. */ if (svga->rebind.val) { ret = svga_rebind_framebuffer_bindings(svga); if (ret != PIPE_OK) return ret; ret = svga_rebind_shaders(svga); if (ret != PIPE_OK) return ret; /* Rebind stream output targets */ ret = svga_rebind_stream_output_targets(svga); if (ret != PIPE_OK) return ret; /* No need to explicitly rebind index buffer and vertex buffers here. * Even if the same index buffer or vertex buffers are referenced for this * draw and we skip emitting the redundant set command, we will still * reference the associated resources. */ } ret = validate_sampler_resources(svga); if (ret != PIPE_OK) return ret; ret = validate_constant_buffers(svga); if (ret != PIPE_OK) return ret; /* Get handle for each referenced vertex buffer */ for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer); if (sbuf) { assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b.b); if (vbuffer_handles[i] == NULL) return PIPE_ERROR_OUT_OF_MEMORY; vbuffers[i] = &sbuf->b.b; last_vbuf = i; } else { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; } } for (; i < svga->state.hw_draw.num_vbuffers; i++) { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; } /* Get handle for the index buffer */ if (ib) { struct svga_buffer *sbuf = svga_buffer(ib); assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER); (void) sbuf; /* silence unused var warning */ ib_handle = svga_buffer_handle(svga, ib); if (!ib_handle) return PIPE_ERROR_OUT_OF_MEMORY; } else { ib_handle = NULL; } /* setup vertex attribute input layout */ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, hwtnl->cmd.vdecl_layout_id); if (ret != PIPE_OK) return ret; svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; } /* setup vertex buffers */ { SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS]; for (i = 0; i < vbuf_count; i++) { vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; vbuffer_attrs[i].sid = 0; } /* If we haven't yet emitted a drawing command or if any * vertex buffer state is changing, issue that state now. */ if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) || vbuf_count != svga->state.hw_draw.num_vbuffers || memcmp(vbuffer_attrs, svga->state.hw_draw.vbuffer_attrs, vbuf_count * sizeof(vbuffer_attrs[0])) || memcmp(vbuffers, svga->state.hw_draw.vbuffers, vbuf_count * sizeof(vbuffers[0]))) { unsigned num_vbuffers; /* get the max of the current bound vertex buffers count and * the to-be-bound vertex buffers count, so as to unbind * the unused vertex buffers. */ num_vbuffers = MAX2(vbuf_count, svga->state.hw_draw.num_vbuffers); if (num_vbuffers > 0) { ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, num_vbuffers, 0, /* startBuffer */ vbuffer_attrs, vbuffer_handles); if (ret != PIPE_OK) return ret; /* save the number of vertex buffers sent to the device, not * including trailing unbound vertex buffers. */ svga->state.hw_draw.num_vbuffers = last_vbuf + 1; memcpy(svga->state.hw_draw.vbuffer_attrs, vbuffer_attrs, num_vbuffers * sizeof(vbuffer_attrs[0])); for (i = 0; i < num_vbuffers; i++) { pipe_resource_reference(&svga->state.hw_draw.vbuffers[i], vbuffers[i]); } } } else { /* Even though we can avoid emitting the redundant SetVertexBuffers * command, we still need to reference the vertex buffers surfaces. */ for (i = 0; i < vbuf_count; i++) { if (vbuffer_handles[i] && !last_command_was_draw(svga)) { ret = svga->swc->resource_rebind(svga->swc, vbuffer_handles[i], NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } } } /* Set primitive type (line, tri, etc) */ if (svga->state.hw_draw.topology != range->primType) { ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); if (ret != PIPE_OK) return ret; svga->state.hw_draw.topology = range->primType; } if (ib_handle) { /* indexed drawing */ SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth); /* setup index buffer */ if (ib != svga->state.hw_draw.ib || indexFormat != svga->state.hw_draw.ib_format || range->indexArray.offset != svga->state.hw_draw.ib_offset) { assert(indexFormat != SVGA3D_FORMAT_INVALID); ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, indexFormat, range->indexArray.offset); if (ret != PIPE_OK) return ret; pipe_resource_reference(&svga->state.hw_draw.ib, ib); svga->state.hw_draw.ib_format = indexFormat; svga->state.hw_draw.ib_offset = range->indexArray.offset; } else { /* Even though we can avoid emitting the redundant SetIndexBuffer * command, we still need to reference the index buffer surface. */ if (!last_command_was_draw(svga)) { ret = svga->swc->resource_rebind(svga->swc, ib_handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, vcount, instance_count, 0, /* startIndexLocation */ range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced drawing */ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc, vcount, 0, /* startIndexLocation */ range->indexBias); if (ret != PIPE_OK) return ret; } } else { /* non-indexed drawing */ if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID || svga->state.hw_draw.ib != NULL) { /* Unbind previously bound index buffer */ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL, SVGA3D_FORMAT_INVALID, 0); if (ret != PIPE_OK) return ret; pipe_resource_reference(&svga->state.hw_draw.ib, NULL); svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID; } assert(svga->state.hw_draw.ib == NULL); if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, instance_count, range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced */ ret = SVGA3D_vgpu10_Draw(svga->swc, vcount, range->indexBias); if (ret != PIPE_OK) return ret; } } hwtnl->cmd.prim_count = 0; return PIPE_OK; }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } if (size % 16 != 0) { /* GL's buffer range sizes can be any number of bytes but the * SVGA3D device requires a multiple of 16 bytes. */ const unsigned total_size = buffer->b.b.width0; if (offset + align(size, 16) <= total_size) { /* round up size to multiple of 16 */ size = align(size, 16); } else { /* round down to mulitple of 16 (this may cause rendering problems * but should avoid a device error). */ size &= ~15; } } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; svga->hud.num_const_buf_updates++; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
static enum pipe_error emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty) { const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX]; unsigned buffer_indexes[SVGA3D_INPUTREG_MAX]; unsigned i; unsigned neg_bias = 0; assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); /** * We can't set the VDECL offset to something negative, so we * must calculate a common negative additional index bias, and modify * the VDECL offsets accordingly so they *all* end up positive. * * Note that the exact value of the negative index bias is not that * important, since we compensate for it when we calculate the vertex * buffer offset below. The important thing is that all vertex buffer * offsets remain positive. * * Note that we use a negative bias variable in order to make the * rounding maths more easy to follow, and to avoid int / unsigned * confusion. */ for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; struct svga_buffer *buffer; unsigned int offset = vb->buffer_offset + ve[i].src_offset; unsigned tmp_neg_bias = 0; if (!vb->buffer) continue; buffer = svga_buffer(vb->buffer); if (buffer->uploaded.start > offset) { tmp_neg_bias = buffer->uploaded.start - offset; if (vb->stride) tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; neg_bias = MAX2(neg_bias, tmp_neg_bias); } } for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; struct svga_buffer *buffer; if (!vb->buffer) continue; buffer = svga_buffer(vb->buffer); svga_generate_vdecl_semantics( i, &usage, &index ); /* SVGA_NEW_VELEMENT */ decls[i].identity.type = svga->curr.velems->decl_type[i]; decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT; decls[i].identity.usage = usage; decls[i].identity.usageIndex = index; decls[i].array.stride = vb->stride; /* Compensate for partially uploaded vbo, and * for the negative index bias. */ decls[i].array.offset = (vb->buffer_offset + ve[i].src_offset + neg_bias * vb->stride - buffer->uploaded.start); assert(decls[i].array.offset >= 0); buffer_indexes[i] = ve[i].vertex_buffer_index; assert(!buffer->uploaded.buffer); } svga_hwtnl_vertex_decls(svga->hwtnl, svga->curr.velems->count, decls, buffer_indexes, svga->curr.velems->id); svga_hwtnl_vertex_buffers(svga->hwtnl, svga->curr.num_vertex_buffers, svga->curr.vb); svga_hwtnl_set_index_bias( svga->hwtnl, -(int) neg_bias ); return PIPE_OK; }
static void svga_set_stream_output_targets(struct pipe_context *pipe, unsigned num_targets, struct pipe_stream_output_target **targets, const unsigned *offsets) { struct svga_context *svga = svga_context(pipe); struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; enum pipe_error ret; unsigned i; unsigned num_so_targets; SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, num_targets); assert(svga_have_vgpu10(svga)); /* Mark the streamout buffers as dirty so that we'll issue readbacks * before mapping. */ for (i = 0; i < svga->num_so_targets; i++) { struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer); sbuf->dirty = TRUE; } assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); for (i = 0; i < num_targets; i++) { struct svga_stream_output_target *sot = svga_stream_output_target(targets[i]); struct svga_buffer *sbuf = svga_buffer(sot->base.buffer); unsigned size; assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); (void) sbuf; svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer); svga->so_targets[i] = &sot->base; soBindings[i].offset = sot->base.buffer_offset; /* The size cannot extend beyond the end of the buffer. Clamp it. */ size = MIN2(sot->base.buffer_size, sot->base.buffer->width0 - sot->base.buffer_offset); soBindings[i].sizeInBytes = size; } /* unbind any previously bound stream output buffers */ for (; i < svga->num_so_targets; i++) { svga->so_surfaces[i] = NULL; svga->so_targets[i] = NULL; } num_so_targets = MAX2(svga->num_so_targets, num_targets); ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, soBindings, svga->so_surfaces); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, soBindings, svga->so_surfaces); } svga->num_so_targets = num_targets; }
static enum pipe_error draw_vgpu10(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, unsigned vcount, unsigned min_index, unsigned max_index, struct pipe_resource *ib, unsigned start_instance, unsigned instance_count) { struct svga_context *svga = hwtnl->svga; struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle; const unsigned vbuf_count = hwtnl->cmd.vbuf_count; enum pipe_error ret; unsigned i; assert(svga_have_vgpu10(svga)); assert(hwtnl->cmd.prim_count == 0); /* We need to reemit all the current resource bindings along with the Draw * command to be sure that the referenced resources are available for the * Draw command, just in case the surfaces associated with the resources * are paged out. */ if (svga->rebind.val) { ret = svga_rebind_framebuffer_bindings(svga); if (ret != PIPE_OK) return ret; ret = svga_rebind_shaders(svga); if (ret != PIPE_OK) return ret; } ret = validate_sampler_resources(svga); if (ret != PIPE_OK) return ret; ret = validate_constant_buffers(svga); if (ret != PIPE_OK) return ret; /* Get handle for each referenced vertex buffer */ for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer); if (sbuf) { assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b); if (vb_handle[i] == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else { vb_handle[i] = NULL; } } /* Get handles for the index buffers */ if (ib) { struct svga_buffer *sbuf = svga_buffer(ib); assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER); (void) sbuf; /* silence unused var warning */ ib_handle = svga_buffer_handle(svga, ib); if (ib_handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else { ib_handle = NULL; } /* setup vertex attribute input layout */ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, hwtnl->cmd.vdecl_layout_id); if (ret != PIPE_OK) return ret; svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; } /* setup vertex buffers */ { SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS]; for (i = 0; i < vbuf_count; i++) { buffers[i].stride = hwtnl->cmd.vbufs[i].stride; buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; } if (vbuf_count > 0) { ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count, 0, /* startBuffer */ buffers, vb_handle); if (ret != PIPE_OK) return ret; } } /* Set primitive type (line, tri, etc) */ if (svga->state.hw_draw.topology != range->primType) { ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); if (ret != PIPE_OK) return ret; svga->state.hw_draw.topology = range->primType; } if (ib_handle) { /* indexed drawing */ SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth); /* setup index buffer */ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, indexFormat, range->indexArray.offset); if (ret != PIPE_OK) return ret; if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, vcount, instance_count, 0, /* startIndexLocation */ range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced drawing */ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc, vcount, 0, /* startIndexLocation */ range->indexBias); if (ret != PIPE_OK) return ret; } } else { /* non-indexed drawing */ if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, instance_count, range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced */ ret = SVGA3D_vgpu10_Draw(svga->swc, vcount, range->indexBias); if (ret != PIPE_OK) return ret; } } hwtnl->cmd.prim_count = 0; return PIPE_OK; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static void * svga_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); transfer = MALLOC_STRUCT(pipe_transfer); if (!transfer) { goto done; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; transfer->layer_stride = 0; if (usage & PIPE_TRANSFER_WRITE) { /* If we write to the buffer for any reason, free any saved translated * vertices. */ pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). */ assert(svga_have_vgpu10(svga)); if (!sbuf->user) { (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); } if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); svga_context_finish(svga); } assert(sbuf->handle); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); assert(ret == PIPE_OK); } svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; } if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. * With GB objects, the map operation takes care of this * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, * and the old backing store is busy. */ if (!svga_have_gb_objects(svga)) svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (svga_buffer_has_hw_storage(sbuf)) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); goto done; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); goto done; } } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); } } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; *ptransfer = transfer; } else { FREE(transfer); } svga->hud.map_buffer_time += (svga_get_time(svga) - begin); done: SVGA_STATS_TIME_POP(svga_sws(svga)); return map; }