static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
static enum pipe_error validate_constant_buffers(struct svga_context *svga) { unsigned shader; assert(svga_have_vgpu10(svga)); for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { enum pipe_error ret; struct svga_buffer *buffer; struct svga_winsys_surface *handle; unsigned enabled_constbufs; /* Rebind the default constant buffer if needed */ if (svga->rebind.flags.constbufs) { buffer = svga_buffer(svga->state.hw_draw.constbuf[shader]); if (buffer) { ret = svga->swc->resource_rebind(svga->swc, buffer->handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } /* * Reference other bound constant buffers to ensure pending updates are * noticed by the device. */ enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] & ~1u; while (enabled_constbufs) { unsigned i = u_bit_scan(&enabled_constbufs); buffer = svga_buffer(svga->curr.constbufs[shader][i].buffer); if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); if (svga->rebind.flags.constbufs) { ret = svga->swc->resource_rebind(svga->swc, handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } } } svga->rebind.flags.constbufs = FALSE; return PIPE_OK; }
static enum pipe_error draw_vgpu10(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, unsigned vcount, unsigned min_index, unsigned max_index, struct pipe_resource *ib, unsigned start_instance, unsigned instance_count) { struct svga_context *svga = hwtnl->svga; struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle; const unsigned vbuf_count = hwtnl->cmd.vbuf_count; enum pipe_error ret; unsigned i; assert(svga_have_vgpu10(svga)); assert(hwtnl->cmd.prim_count == 0); /* We need to reemit all the current resource bindings along with the Draw * command to be sure that the referenced resources are available for the * Draw command, just in case the surfaces associated with the resources * are paged out. */ if (svga->rebind.val) { ret = svga_rebind_framebuffer_bindings(svga); if (ret != PIPE_OK) return ret; ret = svga_rebind_shaders(svga); if (ret != PIPE_OK) return ret; } ret = validate_sampler_resources(svga); if (ret != PIPE_OK) return ret; ret = validate_constant_buffers(svga); if (ret != PIPE_OK) return ret; /* Get handle for each referenced vertex buffer */ for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer); if (sbuf) { assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); vb_handle[i] = svga_buffer_handle(svga, &sbuf->b.b); if (vb_handle[i] == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else { vb_handle[i] = NULL; } } /* Get handles for the index buffers */ if (ib) { struct svga_buffer *sbuf = svga_buffer(ib); assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER); (void) sbuf; /* silence unused var warning */ ib_handle = svga_buffer_handle(svga, ib); if (ib_handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else { ib_handle = NULL; } /* setup vertex attribute input layout */ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, hwtnl->cmd.vdecl_layout_id); if (ret != PIPE_OK) return ret; svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; } /* setup vertex buffers */ { SVGA3dVertexBuffer buffers[PIPE_MAX_ATTRIBS]; for (i = 0; i < vbuf_count; i++) { buffers[i].stride = hwtnl->cmd.vbufs[i].stride; buffers[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; } if (vbuf_count > 0) { ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, vbuf_count, 0, /* startBuffer */ buffers, vb_handle); if (ret != PIPE_OK) return ret; } } /* Set primitive type (line, tri, etc) */ if (svga->state.hw_draw.topology != range->primType) { ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); if (ret != PIPE_OK) return ret; svga->state.hw_draw.topology = range->primType; } if (ib_handle) { /* indexed drawing */ SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth); /* setup index buffer */ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, indexFormat, range->indexArray.offset); if (ret != PIPE_OK) return ret; if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, vcount, instance_count, 0, /* startIndexLocation */ range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced drawing */ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc, vcount, 0, /* startIndexLocation */ range->indexBias); if (ret != PIPE_OK) return ret; } } else { /* non-indexed drawing */ if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, instance_count, range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced */ ret = SVGA3D_vgpu10_Draw(svga->swc, vcount, range->indexBias); if (ret != PIPE_OK) return ret; } } hwtnl->cmd.prim_count = 0; return PIPE_OK; }
static enum pipe_error validate_sampler_resources(struct svga_context *svga) { unsigned shader; assert(svga_have_vgpu10(svga)); for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { unsigned count = svga->curr.num_sampler_views[shader]; unsigned i; struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; enum pipe_error ret; /* * Reference bound sampler resources to ensure pending updates are * noticed by the device. */ for (i = 0; i < count; i++) { struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); if (sv) { if (sv->base.texture->target == PIPE_BUFFER) { surfaces[i] = svga_buffer_handle(svga, sv->base.texture); } else { surfaces[i] = svga_texture(sv->base.texture)->handle; } } else { surfaces[i] = NULL; } } if (shader == PIPE_SHADER_FRAGMENT && svga->curr.rast->templ.poly_stipple_enable) { const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; struct svga_pipe_sampler_view *sv = svga->polygon_stipple.sampler_view; assert(sv); surfaces[unit] = svga_texture(sv->base.texture)->handle; count = MAX2(count, unit+1); } /* rebind the shader resources if needed */ if (svga->rebind.flags.texture_samplers) { for (i = 0; i < count; i++) { if (surfaces[i]) { ret = svga->swc->resource_rebind(svga->swc, surfaces[i], NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } } } svga->rebind.flags.texture_samplers = FALSE; return PIPE_OK; }
static enum pipe_error draw_vgpu9(struct svga_hwtnl *hwtnl) { struct svga_winsys_context *swc = hwtnl->cmd.swc; struct svga_context *svga = hwtnl->svga; enum pipe_error ret; struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle[QSZ]; struct svga_winsys_surface *handle; SVGA3dVertexDecl *vdecl; SVGA3dPrimitiveRange *prim; unsigned i; for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { unsigned j = hwtnl->cmd.vdecl_buffer_index[i]; handle = svga_buffer_handle(svga, hwtnl->cmd.vbufs[j].buffer); if (handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; vb_handle[i] = handle; } for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); if (handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else handle = NULL; ib_handle[i] = handle; } if (svga->rebind.flags.rendertargets) { ret = svga_reemit_framebuffer_bindings(svga); if (ret != PIPE_OK) { return ret; } } if (svga->rebind.flags.texture_samplers) { ret = svga_reemit_tss_bindings(svga); if (ret != PIPE_OK) { return ret; } } if (svga->rebind.flags.vs) { ret = svga_reemit_vs_bindings(svga); if (ret != PIPE_OK) { return ret; } } if (svga->rebind.flags.fs) { ret = svga_reemit_fs_bindings(svga); if (ret != PIPE_OK) { return ret; } } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", svga->curr.framebuffer.cbufs[0] ? svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count, &prim, hwtnl->cmd.prim_count); if (ret != PIPE_OK) return ret; memcpy(vdecl, hwtnl->cmd.vdecl, hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { /* check for 4-byte alignment */ assert(vdecl[i].array.offset % 4 == 0); assert(vdecl[i].array.stride % 4 == 0); /* Given rangeHint is considered to be relative to indexBias, and * indexBias varies per primitive, we cannot accurately supply an * rangeHint when emitting more than one primitive per draw command. */ if (hwtnl->cmd.prim_count == 1) { vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; } else { vdecl[i].rangeHint.first = 0; vdecl[i].rangeHint.last = 0; } swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL, vb_handle[i], SVGA_RELOC_READ); } memcpy(prim, hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); for (i = 0; i < hwtnl->cmd.prim_count; i++) { swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL, ib_handle[i], SVGA_RELOC_READ); pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL); } SVGA_FIFOCommitAll(swc); hwtnl->cmd.prim_count = 0; return PIPE_OK; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static void * svga_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); transfer = MALLOC_STRUCT(pipe_transfer); if (!transfer) { goto done; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; transfer->layer_stride = 0; if (usage & PIPE_TRANSFER_WRITE) { /* If we write to the buffer for any reason, free any saved translated * vertices. */ pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). */ assert(svga_have_vgpu10(svga)); if (!sbuf->user) { (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); } if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); svga_context_finish(svga); } assert(sbuf->handle); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); assert(ret == PIPE_OK); } svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; } if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. * With GB objects, the map operation takes care of this * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, * and the old backing store is busy. */ if (!svga_have_gb_objects(svga)) svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (svga_buffer_has_hw_storage(sbuf)) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); goto done; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); goto done; } } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); } } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; *ptransfer = transfer; } else { FREE(transfer); } svga->hud.map_buffer_time += (svga_get_time(svga) - begin); done: SVGA_STATS_TIME_POP(svga_sws(svga)); return map; }
static void svga_set_stream_output_targets(struct pipe_context *pipe, unsigned num_targets, struct pipe_stream_output_target **targets, const unsigned *offsets) { struct svga_context *svga = svga_context(pipe); struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; enum pipe_error ret; unsigned i; unsigned num_so_targets; SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, num_targets); assert(svga_have_vgpu10(svga)); /* Mark the streamout buffers as dirty so that we'll issue readbacks * before mapping. */ for (i = 0; i < svga->num_so_targets; i++) { struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer); sbuf->dirty = TRUE; } assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); for (i = 0; i < num_targets; i++) { struct svga_stream_output_target *sot = svga_stream_output_target(targets[i]); struct svga_buffer *sbuf = svga_buffer(sot->base.buffer); unsigned size; assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); (void) sbuf; svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer); svga->so_targets[i] = &sot->base; soBindings[i].offset = sot->base.buffer_offset; /* The size cannot extend beyond the end of the buffer. Clamp it. */ size = MIN2(sot->base.buffer_size, sot->base.buffer->width0 - sot->base.buffer_offset); soBindings[i].sizeInBytes = size; } /* unbind any previously bound stream output buffers */ for (; i < svga->num_so_targets; i++) { svga->so_surfaces[i] = NULL; svga->so_targets[i] = NULL; } num_so_targets = MAX2(svga->num_so_targets, num_targets); ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, soBindings, svga->so_surfaces); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, soBindings, svga->so_surfaces); } svga->num_so_targets = num_targets; }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } if (size % 16 != 0) { /* GL's buffer range sizes can be any number of bytes but the * SVGA3D device requires a multiple of 16 bytes. */ const unsigned total_size = buffer->b.b.width0; if (offset + align(size, 16) <= total_size) { /* round up size to multiple of 16 */ size = align(size, 16); } else { /* round down to mulitple of 16 (this may cause rendering problems * but should avoid a device error). */ size &= ~15; } } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; svga->hud.num_const_buf_updates++; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
static enum pipe_error emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) { const struct pipe_constant_buffer *cbuf; struct pipe_resource *dst_buffer = NULL; enum pipe_error ret = PIPE_OK; struct pipe_transfer *src_transfer; struct svga_winsys_surface *dst_handle; float extras[MAX_EXTRA_CONSTS][4]; unsigned extra_count, extra_size, extra_offset; unsigned new_buf_size; void *src_map = NULL, *dst_map; unsigned offset; const struct svga_shader_variant *variant; unsigned alloc_buf_size; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY || shader == PIPE_SHADER_FRAGMENT); cbuf = &svga->curr.constbufs[shader][0]; switch (shader) { case PIPE_SHADER_VERTEX: variant = svga->state.hw_draw.vs; extra_count = svga_get_extra_vs_constants(svga, (float *) extras); break; case PIPE_SHADER_FRAGMENT: variant = svga->state.hw_draw.fs; extra_count = svga_get_extra_fs_constants(svga, (float *) extras); break; case PIPE_SHADER_GEOMETRY: variant = svga->state.hw_draw.gs; extra_count = svga_get_extra_gs_constants(svga, (float *) extras); break; default: assert(!"Unexpected shader type"); /* Don't return an error code since we don't want to keep re-trying * this function and getting stuck in an infinite loop. */ return PIPE_OK; } assert(variant); /* Compute extra constants size and offset in bytes */ extra_size = extra_count * 4 * sizeof(float); extra_offset = 4 * sizeof(float) * variant->extra_const_start; if (cbuf->buffer_size + extra_size == 0) return PIPE_OK; /* nothing to do */ /* Typically, the cbuf->buffer here is a user-space buffer so mapping * it is really cheap. If we ever get real HW buffers for constants * we should void mapping and instead use a ResourceCopy command. */ if (cbuf->buffer_size > 0) { src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer, cbuf->buffer_offset, cbuf->buffer_size, PIPE_TRANSFER_READ, &src_transfer); assert(src_map); if (!src_map) { return PIPE_ERROR_OUT_OF_MEMORY; } } /* The new/dest buffer's size must be large enough to hold the original, * user-specified constants, plus the extra constants. * The size of the original constant buffer _should_ agree with what the * shader is expecting, but it might not (it's not enforced anywhere by * gallium). */ new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size; /* According to the DX10 spec, the constant buffer size must be * in multiples of 16. */ new_buf_size = align(new_buf_size, 16); /* Constant buffer size in the upload buffer must be in multiples of 256. * In order to maximize the chance of merging the upload buffer chunks * when svga_buffer_add_range() is called, * the allocate buffer size needs to be in multiples of 256 as well. * Otherwise, since there is gap between each dirty range of the upload buffer, * each dirty range will end up in its own UPDATE_GB_IMAGE command. */ alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT); u_upload_alloc(svga->const0_upload, 0, alloc_buf_size, CONST0_UPLOAD_ALIGNMENT, &offset, &dst_buffer, &dst_map); if (!dst_map) { if (src_map) pipe_buffer_unmap(&svga->pipe, src_transfer); return PIPE_ERROR_OUT_OF_MEMORY; } if (src_map) { memcpy(dst_map, src_map, cbuf->buffer_size); pipe_buffer_unmap(&svga->pipe, src_transfer); } if (extra_size) { assert(extra_offset + extra_size <= new_buf_size); memcpy((char *) dst_map + extra_offset, extras, extra_size); } u_upload_unmap(svga->const0_upload); /* Issue the SetSingleConstantBuffer command */ dst_handle = svga_buffer_handle(svga, dst_buffer); if (!dst_handle) { pipe_resource_reference(&dst_buffer, NULL); return PIPE_ERROR_OUT_OF_MEMORY; } assert(new_buf_size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, 0, /* index */ svga_shader_type(shader), dst_handle, offset, new_buf_size); if (ret != PIPE_OK) { pipe_resource_reference(&dst_buffer, NULL); return ret; } /* Save this const buffer until it's replaced in the future. * Otherwise, all references to the buffer will go away after the * command buffer is submitted, it'll get recycled and we will have * incorrect constant buffer bindings. */ pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer); svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size; pipe_resource_reference(&dst_buffer, NULL); svga->hud.num_const_buf_updates++; return ret; }
static enum pipe_error draw_vgpu10(struct svga_hwtnl *hwtnl, const SVGA3dPrimitiveRange *range, unsigned vcount, unsigned min_index, unsigned max_index, struct pipe_resource *ib, unsigned start_instance, unsigned instance_count) { struct svga_context *svga = hwtnl->svga; struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle; const unsigned vbuf_count = hwtnl->cmd.vbuf_count; int last_vbuf = -1; enum pipe_error ret; unsigned i; assert(svga_have_vgpu10(svga)); assert(hwtnl->cmd.prim_count == 0); /* We need to reemit all the current resource bindings along with the Draw * command to be sure that the referenced resources are available for the * Draw command, just in case the surfaces associated with the resources * are paged out. */ if (svga->rebind.val) { ret = svga_rebind_framebuffer_bindings(svga); if (ret != PIPE_OK) return ret; ret = svga_rebind_shaders(svga); if (ret != PIPE_OK) return ret; /* Rebind stream output targets */ ret = svga_rebind_stream_output_targets(svga); if (ret != PIPE_OK) return ret; /* No need to explicitly rebind index buffer and vertex buffers here. * Even if the same index buffer or vertex buffers are referenced for this * draw and we skip emitting the redundant set command, we will still * reference the associated resources. */ } ret = validate_sampler_resources(svga); if (ret != PIPE_OK) return ret; ret = validate_constant_buffers(svga); if (ret != PIPE_OK) return ret; /* Get handle for each referenced vertex buffer */ for (i = 0; i < vbuf_count; i++) { struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer); if (sbuf) { assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER); vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b.b); if (vbuffer_handles[i] == NULL) return PIPE_ERROR_OUT_OF_MEMORY; vbuffers[i] = &sbuf->b.b; last_vbuf = i; } else { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; } } for (; i < svga->state.hw_draw.num_vbuffers; i++) { vbuffers[i] = NULL; vbuffer_handles[i] = NULL; } /* Get handle for the index buffer */ if (ib) { struct svga_buffer *sbuf = svga_buffer(ib); assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER); (void) sbuf; /* silence unused var warning */ ib_handle = svga_buffer_handle(svga, ib); if (!ib_handle) return PIPE_ERROR_OUT_OF_MEMORY; } else { ib_handle = NULL; } /* setup vertex attribute input layout */ if (svga->state.hw_draw.layout_id != hwtnl->cmd.vdecl_layout_id) { ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, hwtnl->cmd.vdecl_layout_id); if (ret != PIPE_OK) return ret; svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id; } /* setup vertex buffers */ { SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS]; for (i = 0; i < vbuf_count; i++) { vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; vbuffer_attrs[i].sid = 0; } /* If we haven't yet emitted a drawing command or if any * vertex buffer state is changing, issue that state now. */ if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) || vbuf_count != svga->state.hw_draw.num_vbuffers || memcmp(vbuffer_attrs, svga->state.hw_draw.vbuffer_attrs, vbuf_count * sizeof(vbuffer_attrs[0])) || memcmp(vbuffers, svga->state.hw_draw.vbuffers, vbuf_count * sizeof(vbuffers[0]))) { unsigned num_vbuffers; /* get the max of the current bound vertex buffers count and * the to-be-bound vertex buffers count, so as to unbind * the unused vertex buffers. */ num_vbuffers = MAX2(vbuf_count, svga->state.hw_draw.num_vbuffers); if (num_vbuffers > 0) { ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, num_vbuffers, 0, /* startBuffer */ vbuffer_attrs, vbuffer_handles); if (ret != PIPE_OK) return ret; /* save the number of vertex buffers sent to the device, not * including trailing unbound vertex buffers. */ svga->state.hw_draw.num_vbuffers = last_vbuf + 1; memcpy(svga->state.hw_draw.vbuffer_attrs, vbuffer_attrs, num_vbuffers * sizeof(vbuffer_attrs[0])); for (i = 0; i < num_vbuffers; i++) { pipe_resource_reference(&svga->state.hw_draw.vbuffers[i], vbuffers[i]); } } } else { /* Even though we can avoid emitting the redundant SetVertexBuffers * command, we still need to reference the vertex buffers surfaces. */ for (i = 0; i < vbuf_count; i++) { if (vbuffer_handles[i] && !last_command_was_draw(svga)) { ret = svga->swc->resource_rebind(svga->swc, vbuffer_handles[i], NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } } } /* Set primitive type (line, tri, etc) */ if (svga->state.hw_draw.topology != range->primType) { ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType); if (ret != PIPE_OK) return ret; svga->state.hw_draw.topology = range->primType; } if (ib_handle) { /* indexed drawing */ SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth); /* setup index buffer */ if (ib != svga->state.hw_draw.ib || indexFormat != svga->state.hw_draw.ib_format || range->indexArray.offset != svga->state.hw_draw.ib_offset) { assert(indexFormat != SVGA3D_FORMAT_INVALID); ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle, indexFormat, range->indexArray.offset); if (ret != PIPE_OK) return ret; pipe_resource_reference(&svga->state.hw_draw.ib, ib); svga->state.hw_draw.ib_format = indexFormat; svga->state.hw_draw.ib_offset = range->indexArray.offset; } else { /* Even though we can avoid emitting the redundant SetIndexBuffer * command, we still need to reference the index buffer surface. */ if (!last_command_was_draw(svga)) { ret = svga->swc->resource_rebind(svga->swc, ib_handle, NULL, SVGA_RELOC_READ); if (ret != PIPE_OK) return ret; } } if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc, vcount, instance_count, 0, /* startIndexLocation */ range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced drawing */ ret = SVGA3D_vgpu10_DrawIndexed(svga->swc, vcount, 0, /* startIndexLocation */ range->indexBias); if (ret != PIPE_OK) return ret; } } else { /* non-indexed drawing */ if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID || svga->state.hw_draw.ib != NULL) { /* Unbind previously bound index buffer */ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL, SVGA3D_FORMAT_INVALID, 0); if (ret != PIPE_OK) return ret; pipe_resource_reference(&svga->state.hw_draw.ib, NULL); svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID; } assert(svga->state.hw_draw.ib == NULL); if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, instance_count, range->indexBias, start_instance); if (ret != PIPE_OK) return ret; } else { /* non-instanced */ ret = SVGA3D_vgpu10_Draw(svga->swc, vcount, range->indexBias); if (ret != PIPE_OK) return ret; } } hwtnl->cmd.prim_count = 0; return PIPE_OK; }
enum pipe_error svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) { struct svga_winsys_context *swc = hwtnl->cmd.swc; struct svga_context *svga = hwtnl->svga; enum pipe_error ret; if (hwtnl->cmd.prim_count) { struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX]; struct svga_winsys_surface *ib_handle[QSZ]; struct svga_winsys_surface *handle; SVGA3dVertexDecl *vdecl; SVGA3dPrimitiveRange *prim; unsigned i; for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]); if (handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; vb_handle[i] = handle; } for (i = 0; i < hwtnl->cmd.prim_count; i++) { if (hwtnl->cmd.prim_ib[i]) { handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]); if (handle == NULL) return PIPE_ERROR_OUT_OF_MEMORY; } else handle = NULL; ib_handle[i] = handle; } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", svga->curr.framebuffer.cbufs[0] ? svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, &vdecl, hwtnl->cmd.vdecl_count, &prim, hwtnl->cmd.prim_count); if (ret != PIPE_OK) return ret; memcpy( vdecl, hwtnl->cmd.vdecl, hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]); for (i = 0; i < hwtnl->cmd.vdecl_count; i++) { /* Given rangeHint is considered to be relative to indexBias, and * indexBias varies per primitive, we cannot accurately supply an * rangeHint when emitting more than one primitive per draw command. */ if (hwtnl->cmd.prim_count == 1) { vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0]; vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1; } else { vdecl[i].rangeHint.first = 0; vdecl[i].rangeHint.last = 0; } swc->surface_relocation(swc, &vdecl[i].array.surfaceId, vb_handle[i], PIPE_BUFFER_USAGE_GPU_READ); } memcpy( prim, hwtnl->cmd.prim, hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]); for (i = 0; i < hwtnl->cmd.prim_count; i++) { swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, ib_handle[i], PIPE_BUFFER_USAGE_GPU_READ); pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL); } SVGA_FIFOCommitAll( swc ); hwtnl->cmd.prim_count = 0; } return PIPE_OK; }