/** * svga_reemit_gs_bindings - Reemit the geometry shader bindings */ enum pipe_error svga_reemit_gs_bindings(struct svga_context *svga) { enum pipe_error ret; struct svga_winsys_gb_shader *gbshader = NULL; SVGA3dShaderId shaderId = SVGA3D_INVALID_ID; assert(svga->rebind.flags.gs); assert(svga_have_gb_objects(svga)); /* Geometry Shader is only supported in vgpu10 */ assert(svga_have_vgpu10(svga)); if (svga->state.hw_draw.gs) { gbshader = svga->state.hw_draw.gs->gb_shader; shaderId = svga->state.hw_draw.gs->id; } if (!svga_need_to_rebind_resources(svga)) { ret = svga->swc->resource_rebind(svga->swc, NULL, gbshader, SVGA_RELOC_READ); goto out; } ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_GS, gbshader, shaderId); out: if (ret != PIPE_OK) return ret; svga->rebind.flags.gs = FALSE; return PIPE_OK; }
/** * svga_reemit_fs_bindings - Reemit the fragment shader bindings */ enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga) { enum pipe_error ret; assert(svga->rebind.flags.fs); assert(svga_have_gb_objects(svga)); if (!svga->state.hw_draw.fs) return PIPE_OK; if (!svga_need_to_rebind_resources(svga)) { ret = svga->swc->resource_rebind(svga->swc, NULL, svga->state.hw_draw.fs->gb_shader, SVGA_RELOC_READ); goto out; } if (svga_have_vgpu10(svga)) ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, svga->state.hw_draw.fs->gb_shader, svga->state.hw_draw.fs->id); else ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, svga->state.hw_draw.fs->gb_shader); out: if (ret != PIPE_OK) return ret; svga->rebind.flags.fs = FALSE; return PIPE_OK; }
static enum pipe_error emit_hw_fs(struct svga_context *svga, unsigned dirty) { struct svga_shader_variant *variant = NULL; enum pipe_error ret = PIPE_OK; struct svga_fragment_shader *fs = svga->curr.fs; struct svga_fs_compile_key key; /* SVGA_NEW_BLEND * SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST * SVGA_NEW_NEED_SWTNL * SVGA_NEW_SAMPLER * SVGA_NEW_FRAME_BUFFER */ ret = make_fs_key( svga, fs, &key ); if (ret != PIPE_OK) return ret; variant = search_fs_key( fs, &key ); if (!variant) { ret = compile_fs( svga, fs, &key, &variant ); if (ret != PIPE_OK) return ret; } assert(variant); if (variant != svga->state.hw_draw.fs) { if (svga_have_gb_objects(svga)) { /* * Bind is necessary here only because pipebuffer_fenced may move * the shader contents around.... */ ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); if (ret != PIPE_OK) return ret; ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->gb_shader); if (ret != PIPE_OK) return ret; svga->rebind.fs = FALSE; } else { ret = SVGA3D_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, variant->id); if (ret != PIPE_OK) return ret; } svga->dirty |= SVGA_NEW_FS_VARIANT; svga->state.hw_draw.fs = variant; } return PIPE_OK; }
/** * Issue commands to transfer guest memory to the host. */ static enum pipe_error svga_buffer_upload_command(struct svga_context *svga, struct svga_buffer *sbuf) { if (svga_have_gb_objects(svga)) { return svga_buffer_upload_gb_command(svga, sbuf); } else { return svga_buffer_upload_hb_command(svga, sbuf); } }
/** * unmap direct map transfer request */ static void svga_texture_transfer_unmap_direct(struct svga_context *svga, struct svga_transfer *st) { struct pipe_transfer *transfer = &st->base; struct svga_texture *tex = svga_texture(transfer->resource); svga_texture_surface_unmap(svga, transfer); /* Now send an update command to update the content in the backend. */ if (st->base.usage & PIPE_TRANSFER_WRITE) { struct svga_winsys_surface *surf = tex->handle; enum pipe_error ret; assert(svga_have_gb_objects(svga)); /* update the effected region */ SVGA3dBox box = st->box; unsigned nlayers; switch (tex->b.b.target) { case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: case PIPE_TEXTURE_1D_ARRAY: nlayers = box.d; box.d = 1; break; default: nlayers = 1; break; } if (0) debug_printf("%s %d, %d, %d %d x %d x %d\n", __FUNCTION__, box.x, box.y, box.z, box.w, box.h, box.d); if (svga_have_vgpu10(svga)) { unsigned i; for (i = 0; i < nlayers; i++) { ret = update_image_vgpu10(svga, surf, &box, st->slice + i, transfer->level, tex->b.b.last_level + 1); assert(ret == PIPE_OK); } } else { assert(nlayers == 1); ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level); assert(ret == PIPE_OK); } (void) ret; } }
void svga_context_flush( struct svga_context *svga, struct pipe_fence_handle **pfence ) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct pipe_fence_handle *fence = NULL; uint64_t t0; svga->curr.nr_fbs = 0; /* Ensure that texture dma uploads are processed * before submitting commands. */ svga_context_flush_buffers(svga); svga->hud.command_buffer_size += svga->swc->get_command_buffer_size(svga->swc); /* Flush pending commands to hardware: */ t0 = os_time_get(); svga->swc->flush(svga->swc, &fence); svga->hud.flush_time += (os_time_get() - t0); svga->hud.num_flushes++; svga_screen_cache_flush(svgascreen, fence); /* To force the re-emission of rendertargets and texture sampler bindings on * the next command buffer. */ svga->rebind.flags.rendertargets = TRUE; svga->rebind.flags.texture_samplers = TRUE; if (svga_have_gb_objects(svga)) { svga->rebind.flags.constbufs = TRUE; svga->rebind.flags.vs = TRUE; svga->rebind.flags.fs = TRUE; svga->rebind.flags.gs = TRUE; if (svga_need_to_rebind_resources(svga)) { svga->rebind.flags.query = TRUE; } } if (SVGA_DEBUG & DEBUG_SYNC) { if (fence) svga->pipe.screen->fence_finish( svga->pipe.screen, fence, PIPE_TIMEOUT_INFINITE); } if (pfence) svgascreen->sws->fence_reference(svgascreen->sws, pfence, fence); svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); }
/** * svga_reemit_fs_bindings - Reemit the fragment shader bindings */ enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga) { enum pipe_error ret; assert(svga->rebind.fs); assert(svga_have_gb_objects(svga)); if (!svga->state.hw_draw.fs) return PIPE_OK; ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, svga->state.hw_draw.fs->gb_shader); if (ret != PIPE_OK) return ret; svga->rebind.fs = FALSE; return PIPE_OK; }
/** * Request a transfer map to the texture resource */ static void * svga_texture_transfer_map(struct pipe_context *pipe, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws; struct svga_texture *tex = svga_texture(texture); struct svga_transfer *st; struct svga_winsys_surface *surf = tex->handle; boolean use_direct_map = svga_have_gb_objects(svga) && !svga_have_gb_dma(svga); void *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_TEXTRANSFERMAP); if (!surf) goto done; /* We can't map texture storage directly unless we have GB objects */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { if (svga_have_gb_objects(svga)) use_direct_map = TRUE; else goto done; } st = CALLOC_STRUCT(svga_transfer); if (!st) goto done; st->base.level = level; st->base.usage = usage; st->base.box = *box; /* The modified transfer map box with the array index removed from z. * The array index is specified in slice. */ st->box.x = box->x; st->box.y = box->y; st->box.z = box->z; st->box.w = box->width; st->box.h = box->height; st->box.d = box->depth; switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: st->slice = st->base.box.z; st->box.z = 0; /* so we don't apply double offsets below */ break; case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: st->slice = st->base.box.z; st->box.z = 0; /* so we don't apply double offsets below */ /* Force direct map for transfering multiple slices */ if (st->base.box.depth > 1) use_direct_map = svga_have_gb_objects(svga); break; default: st->slice = 0; break; } /* Force direct map for multisample surface */ if (texture->nr_samples > 1) { assert(svga_have_gb_objects(svga)); assert(sws->have_sm4_1); use_direct_map = TRUE; } st->use_direct_map = use_direct_map; pipe_resource_reference(&st->base.resource, texture); /* If this is the first time mapping to the surface in this * command buffer, clear the dirty masks of this surface. */ if (sws->surface_is_flushed(sws, surf)) { svga_clear_texture_dirty(tex); } if (!use_direct_map) { /* upload to the DMA buffer */ map = svga_texture_transfer_map_dma(svga, st); } else { boolean can_use_upload = tex->can_use_upload && !(st->base.usage & PIPE_TRANSFER_READ); boolean was_rendered_to = svga_was_texture_rendered_to(svga_texture(texture), st->slice, st->base.level); /* If the texture was already rendered to and upload buffer * is supported, then we will use upload buffer to * avoid the need to read back the texture content; otherwise, * we'll first try to map directly to the GB surface, if it is blocked, * then we'll try the upload buffer. */ if (was_rendered_to && can_use_upload) { map = svga_texture_transfer_map_upload(svga, st); } else { unsigned orig_usage = st->base.usage; /* First try directly map to the GB surface */ if (can_use_upload) st->base.usage |= PIPE_TRANSFER_DONTBLOCK; map = svga_texture_transfer_map_direct(svga, st); st->base.usage = orig_usage; if (!map && can_use_upload) { /* if direct map with DONTBLOCK fails, then try upload to the * texture upload buffer. */ map = svga_texture_transfer_map_upload(svga, st); } } /* If upload fails, then try direct map again without forcing it * to DONTBLOCK. */ if (!map) { map = svga_texture_transfer_map_direct(svga, st); } } if (!map) { FREE(st); } else { *ptransfer = &st->base; svga->hud.num_textures_mapped++; if (usage & PIPE_TRANSFER_WRITE) { /* record texture upload for HUD */ svga->hud.num_bytes_uploaded += st->base.layer_stride * st->box.d; /* mark this texture level as dirty */ svga_set_texture_dirty(tex, st->slice, level); } } done: svga->hud.map_buffer_time += (svga_get_time(svga) - begin); SVGA_STATS_TIME_POP(sws); (void) sws; return map; }
/** * Get (or create/upload) the winsys surface handle so that we can * refer to this buffer in fifo commands. * This function will create the host surface, and in the GB case also the * hardware storage. In the non-GB case, the hardware storage will be created * if there are mapped ranges and the data is currently in a malloc'ed buffer. */ struct svga_winsys_surface * svga_buffer_handle(struct svga_context *svga, struct pipe_resource *buf) { struct pipe_screen *screen = svga->pipe.screen; struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; enum pipe_error ret; if (!buf) return NULL; sbuf = svga_buffer(buf); assert(!sbuf->user); if (!sbuf->handle) { /* This call will set sbuf->handle */ if (svga_have_gb_objects(svga)) { ret = svga_buffer_update_hw(svga, sbuf); } else { ret = svga_buffer_create_host_surface(ss, sbuf); } if (ret != PIPE_OK) return NULL; } assert(sbuf->handle); if (sbuf->map.num_ranges) { if (!sbuf->dma.pending) { /* * No pending DMA upload yet, so insert a DMA upload command now. */ /* * Migrate the data from swbuf -> hwbuf if necessary. */ ret = svga_buffer_update_hw(svga, sbuf); if (ret == PIPE_OK) { /* * Queue a dma command. */ ret = svga_buffer_upload_command(svga, sbuf); if (ret == PIPE_ERROR_OUT_OF_MEMORY) { svga_context_flush(svga, NULL); ret = svga_buffer_upload_command(svga, sbuf); assert(ret == PIPE_OK); } if (ret == PIPE_OK) { sbuf->dma.pending = TRUE; assert(!sbuf->head.prev && !sbuf->head.next); LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers); } } else if (ret == PIPE_ERROR_OUT_OF_MEMORY) { /* * The buffer is too big to fit in the GMR aperture, so break it in * smaller pieces. */ ret = svga_buffer_upload_piecewise(ss, svga, sbuf); } if (ret != PIPE_OK) { /* * Something unexpected happened above. There is very little that * we can do other than proceeding while ignoring the dirty ranges. */ assert(0); sbuf->map.num_ranges = 0; } } else { /* * There a pending dma already. Make sure it is from this context. */ assert(sbuf->dma.svga == svga); } } assert(!sbuf->map.num_ranges || sbuf->dma.pending); return sbuf->handle; }
/** * Upload the buffer to the host in a piecewise fashion. * * Used when the buffer is too big to fit in the GMR aperture. * This function should never get called in the guest-backed case * since we always have a full-sized hardware storage backing the * host surface. */ static enum pipe_error svga_buffer_upload_piecewise(struct svga_screen *ss, struct svga_context *svga, struct svga_buffer *sbuf) { struct svga_winsys_screen *sws = ss->sws; const unsigned alignment = sizeof(void *); const unsigned usage = 0; unsigned i; assert(sbuf->map.num_ranges); assert(!sbuf->dma.pending); assert(!svga_have_gb_objects(svga)); SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); for (i = 0; i < sbuf->map.num_ranges; ++i) { struct svga_buffer_range *range = &sbuf->map.ranges[i]; unsigned offset = range->start; unsigned size = range->end - range->start; while (offset < range->end) { struct svga_winsys_buffer *hwbuf; uint8_t *map; enum pipe_error ret; if (offset + size > range->end) size = range->end - offset; hwbuf = sws->buffer_create(sws, alignment, usage, size); while (!hwbuf) { size /= 2; if (!size) return PIPE_ERROR_OUT_OF_MEMORY; hwbuf = sws->buffer_create(sws, alignment, usage, size); } SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", offset, offset + size); map = sws->buffer_map(sws, hwbuf, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE); assert(map); if (map) { memcpy(map, (const char *) sbuf->swbuf + offset, size); sws->buffer_unmap(sws, hwbuf); } ret = SVGA3D_BufferDMA(svga->swc, hwbuf, sbuf->handle, SVGA3D_WRITE_HOST_VRAM, size, 0, offset, sbuf->dma.flags); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_BufferDMA(svga->swc, hwbuf, sbuf->handle, SVGA3D_WRITE_HOST_VRAM, size, 0, offset, sbuf->dma.flags); assert(ret == PIPE_OK); } sbuf->dma.flags.discard = FALSE; sws->buffer_destroy(sws, hwbuf); offset += size; } } sbuf->map.num_ranges = 0; return PIPE_OK; }
/** * Insert a number of preliminary UPDATE_GB_IMAGE commands in the * command buffer, equal to the current number of mapped ranges. * The UPDATE_GB_IMAGE commands will be patched with the * actual ranges just before flush. */ static enum pipe_error svga_buffer_upload_gb_command(struct svga_context *svga, struct svga_buffer *sbuf) { struct svga_winsys_context *swc = svga->swc; SVGA3dCmdUpdateGBImage *update_cmd; struct svga_3d_update_gb_image *whole_update_cmd = NULL; uint32 numBoxes = sbuf->map.num_ranges; struct pipe_resource *dummy; unsigned i; assert(svga_have_gb_objects(svga)); assert(numBoxes); assert(sbuf->dma.updates == NULL); if (sbuf->dma.flags.discard) { struct svga_3d_invalidate_gb_image *cicmd = NULL; SVGA3dCmdInvalidateGBImage *invalidate_cmd; const unsigned total_commands_size = sizeof(*invalidate_cmd) + numBoxes * sizeof(*whole_update_cmd); /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by * 'numBoxes' UPDATE_GB_IMAGE commands. Allocate all at once rather * than with separate commands because we need to properly deal with * filling the command buffer. */ invalidate_cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_INVALIDATE_GB_IMAGE, total_commands_size, 1 + numBoxes); if (!invalidate_cmd) return PIPE_ERROR_OUT_OF_MEMORY; cicmd = container_of(invalidate_cmd, cicmd, body); cicmd->header.size = sizeof(*invalidate_cmd); swc->surface_relocation(swc, &invalidate_cmd->image.sid, NULL, sbuf->handle, (SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL | SVGA_RELOC_DMA)); invalidate_cmd->image.face = 0; invalidate_cmd->image.mipmap = 0; /* The whole_update_command is a SVGA3dCmdHeader plus the * SVGA3dCmdUpdateGBImage command. */ whole_update_cmd = (struct svga_3d_update_gb_image *) &invalidate_cmd[1]; /* initialize the first UPDATE_GB_IMAGE command */ whole_update_cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; update_cmd = &whole_update_cmd->body; } else { /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */ const unsigned total_commands_size = sizeof(*update_cmd) + (numBoxes - 1) * sizeof(*whole_update_cmd); update_cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_UPDATE_GB_IMAGE, total_commands_size, numBoxes); if (!update_cmd) return PIPE_ERROR_OUT_OF_MEMORY; /* The whole_update_command is a SVGA3dCmdHeader plus the * SVGA3dCmdUpdateGBImage command. */ whole_update_cmd = container_of(update_cmd, whole_update_cmd, body); } /* Init the first UPDATE_GB_IMAGE command */ whole_update_cmd->header.size = sizeof(*update_cmd); swc->surface_relocation(swc, &update_cmd->image.sid, NULL, sbuf->handle, SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); update_cmd->image.face = 0; update_cmd->image.mipmap = 0; /* Save pointer to the first UPDATE_GB_IMAGE command so that we can * fill in the box info below. */ sbuf->dma.updates = whole_update_cmd; /* * Copy the face, mipmap, etc. info to all subsequent commands. * Also do the surface relocation for each subsequent command. */ for (i = 1; i < numBoxes; ++i) { whole_update_cmd++; memcpy(whole_update_cmd, sbuf->dma.updates, sizeof(*whole_update_cmd)); swc->surface_relocation(swc, &whole_update_cmd->body.image.sid, NULL, sbuf->handle, SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL); } /* Increment reference count */ sbuf->dma.svga = svga; dummy = NULL; pipe_resource_reference(&dummy, &sbuf->b.b); SVGA_FIFOCommitAll(swc); swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; sbuf->dma.flags.discard = FALSE; svga->hud.num_resource_updates++; return PIPE_OK; }
/** * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank. */ static enum pipe_error svga_buffer_upload_command(struct svga_context *svga, struct svga_buffer *sbuf) { struct svga_winsys_context *swc = svga->swc; struct svga_winsys_buffer *guest = sbuf->hwbuf; struct svga_winsys_surface *host = sbuf->handle; SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM; SVGA3dCmdSurfaceDMA *cmd; uint32 numBoxes = sbuf->map.num_ranges; SVGA3dCopyBox *boxes; SVGA3dCmdSurfaceDMASuffix *pSuffix; unsigned region_flags; unsigned surface_flags; struct pipe_resource *dummy; if (svga_have_gb_objects(svga)) return svga_buffer_upload_gb_command(svga, sbuf); if (transfer == SVGA3D_WRITE_HOST_VRAM) { region_flags = SVGA_RELOC_READ; surface_flags = SVGA_RELOC_WRITE; } else if (transfer == SVGA3D_READ_HOST_VRAM) { region_flags = SVGA_RELOC_WRITE; surface_flags = SVGA_RELOC_READ; } else { assert(0); return PIPE_ERROR_BAD_INPUT; } assert(numBoxes); cmd = SVGA3D_FIFOReserve(swc, SVGA_3D_CMD_SURFACE_DMA, sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix, 2); if (!cmd) return PIPE_ERROR_OUT_OF_MEMORY; swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags); cmd->guest.pitch = 0; swc->surface_relocation(swc, &cmd->host.sid, NULL, host, surface_flags); cmd->host.face = 0; cmd->host.mipmap = 0; cmd->transfer = transfer; sbuf->dma.boxes = (SVGA3dCopyBox *)&cmd[1]; sbuf->dma.svga = svga; /* Increment reference count */ dummy = NULL; pipe_resource_reference(&dummy, &sbuf->b.b); pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes); pSuffix->suffixSize = sizeof *pSuffix; pSuffix->maximumOffset = sbuf->b.b.width0; pSuffix->flags = sbuf->dma.flags; SVGA_FIFOCommitAll(swc); sbuf->dma.flags.discard = FALSE; return PIPE_OK; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static void * svga_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); transfer = MALLOC_STRUCT(pipe_transfer); if (!transfer) { goto done; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; transfer->layer_stride = 0; if (usage & PIPE_TRANSFER_WRITE) { /* If we write to the buffer for any reason, free any saved translated * vertices. */ pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). */ assert(svga_have_vgpu10(svga)); if (!sbuf->user) { (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); } if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); svga_context_finish(svga); } assert(sbuf->handle); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); assert(ret == PIPE_OK); } svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; } if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. * With GB objects, the map operation takes care of this * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, * and the old backing store is busy. */ if (!svga_have_gb_objects(svga)) svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (svga_buffer_has_hw_storage(sbuf)) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); goto done; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); goto done; } } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); } } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; *ptransfer = transfer; } else { FREE(transfer); } svga->hud.map_buffer_time += (svga_get_time(svga) - begin); done: SVGA_STATS_TIME_POP(svga_sws(svga)); return map; }
/* * Check and emit a range of shader constant registers, trying to coalesce * successive shader constant updates in a single command in order to save * space on the command buffer. This is a HWv8 feature. */ static enum pipe_error emit_const_range(struct svga_context *svga, unsigned shader, unsigned offset, unsigned count, const float (*values)[4]) { unsigned i, j; enum pipe_error ret; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT); assert(!svga_have_vgpu10(svga)); #ifdef DEBUG if (offset + count > SVGA3D_CONSTREG_MAX) { debug_printf("svga: too many constants (offset %u + count %u = %u (max = %u))\n", offset, count, offset + count, SVGA3D_CONSTREG_MAX); } #endif if (offset > SVGA3D_CONSTREG_MAX) { /* This isn't OK, but if we propagate an error all the way up we'll * just get into more trouble. * XXX note that offset is always zero at this time so this is moot. */ return PIPE_OK; } if (offset + count > SVGA3D_CONSTREG_MAX) { /* Just drop the extra constants for now. * Ideally we should not have allowed the app to create a shader * that exceeds our constant buffer size but there's no way to * express that in gallium at this time. */ count = SVGA3D_CONSTREG_MAX - offset; } i = 0; while (i < count) { if (memcmp(svga->state.hw_draw.cb[shader][offset + i], values[i], 4 * sizeof(float)) != 0) { /* Found one dirty constant */ if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %d: %f %f %f %f\n", __FUNCTION__, shader == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", offset + i, values[i][0], values[i][1], values[i][2], values[i][3]); /* Look for more consecutive dirty constants. */ j = i + 1; while (j < count && j < i + MAX_CONST_REG_COUNT && memcmp(svga->state.hw_draw.cb[shader][offset + j], values[j], 4 * sizeof(float)) != 0) { if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %d: %f %f %f %f\n", __FUNCTION__, shader == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", offset + j, values[j][0], values[j][1], values[j][2], values[j][3]); ++j; } assert(j >= i + 1); /* Send them all together. */ if (svga_have_gb_objects(svga)) { ret = SVGA3D_SetGBShaderConstsInline(svga->swc, offset + i, /* start */ j - i, /* count */ svga_shader_type(shader), SVGA3D_CONST_TYPE_FLOAT, values + i); } else { ret = SVGA3D_SetShaderConsts(svga->swc, offset + i, j - i, svga_shader_type(shader), SVGA3D_CONST_TYPE_FLOAT, values + i); } if (ret != PIPE_OK) { return ret; } /* * Local copy of the hardware state. */ memcpy(svga->state.hw_draw.cb[shader][offset + i], values[i], (j - i) * 4 * sizeof(float)); i = j + 1; svga->hud.num_const_updates++; } else { ++i; } } return PIPE_OK; }
static void * svga_texture_transfer_map(struct pipe_context *pipe, struct pipe_resource *texture, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_texture *tex = svga_texture(texture); struct svga_transfer *st; unsigned nblocksx, nblocksy; boolean use_direct_map = svga_have_gb_objects(svga) && !svga_have_gb_dma(svga); unsigned d; void *returnVal; int64_t begin = os_time_get(); /* We can't map texture storage directly unless we have GB objects */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { if (svga_have_gb_objects(svga)) use_direct_map = TRUE; else return NULL; } st = CALLOC_STRUCT(svga_transfer); if (!st) return NULL; { unsigned w, h; if (use_direct_map) { /* we'll directly access the guest-backed surface */ w = u_minify(texture->width0, level); h = u_minify(texture->height0, level); d = u_minify(texture->depth0, level); } else { /* we'll put the data into a tightly packed buffer */ w = box->width; h = box->height; d = box->depth; } nblocksx = util_format_get_nblocksx(texture->format, w); nblocksy = util_format_get_nblocksy(texture->format, h); } pipe_resource_reference(&st->base.resource, texture); st->base.level = level; st->base.usage = usage; st->base.box = *box; st->base.stride = nblocksx*util_format_get_blocksize(texture->format); st->base.layer_stride = st->base.stride * nblocksy; switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_1D_ARRAY: st->slice = st->base.box.z; st->base.box.z = 0; /* so we don't apply double offsets below */ break; default: st->slice = 0; break; } if (usage & PIPE_TRANSFER_WRITE) { /* record texture upload for HUD */ svga->hud.num_bytes_uploaded += nblocksx * nblocksy * d * util_format_get_blocksize(texture->format); } if (!use_direct_map) { /* Use a DMA buffer */ st->hw_nblocksy = nblocksy; st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, st->hw_nblocksy * st->base.stride * d); while(!st->hwbuf && (st->hw_nblocksy /= 2)) { st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, st->hw_nblocksy * st->base.stride * d); } if (!st->hwbuf) { FREE(st); return NULL; } if (st->hw_nblocksy < nblocksy) { /* We couldn't allocate a hardware buffer big enough for the transfer, * so allocate regular malloc memory instead */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting into %u x %u KB DMA transfers\n", __FUNCTION__, (nblocksy*st->base.stride + 1023)/1024, (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy, (st->hw_nblocksy*st->base.stride + 1023)/1024); } st->swbuf = MALLOC(nblocksy * st->base.stride * d); if (!st->swbuf) { sws->buffer_destroy(sws, st->hwbuf); FREE(st); return NULL; } } if (usage & PIPE_TRANSFER_READ) { SVGA3dSurfaceDMAFlags flags; memset(&flags, 0, sizeof flags); svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags); } } else { struct pipe_transfer *transfer = &st->base; struct svga_winsys_surface *surf = tex->handle; if (!surf) { FREE(st); return NULL; } if (need_tex_readback(transfer)) { enum pipe_error ret; svga_surfaces_flush(svga); if (svga_have_vgpu10(svga)) { ret = readback_image_vgpu10(svga, surf, st->slice, transfer->level, tex->b.b.last_level + 1); } else { ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level); } assert(ret == PIPE_OK); (void) ret; svga_context_flush(svga, NULL); /* * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified * we could potentially clear the flag for all faces/layers/mips. */ svga_clear_texture_rendered_to(tex, st->slice, transfer->level); } else { assert(transfer->usage & PIPE_TRANSFER_WRITE); if ((transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) == 0) { svga_surfaces_flush(svga); if (!sws->surface_is_flushed(sws, surf)) svga_context_flush(svga, NULL); } } } st->use_direct_map = use_direct_map; *ptransfer = &st->base; /* * Begin mapping code */ if (st->swbuf) { returnVal = st->swbuf; } else if (!st->use_direct_map) { returnVal = sws->buffer_map(sws, st->hwbuf, usage); } else { SVGA3dSize baseLevelSize; struct svga_texture *tex = svga_texture(texture); struct svga_winsys_surface *surf = tex->handle; uint8_t *map; boolean retry; unsigned offset, mip_width, mip_height; unsigned xoffset = st->base.box.x; unsigned yoffset = st->base.box.y; unsigned zoffset = st->base.box.z; map = svga->swc->surface_map(svga->swc, surf, usage, &retry); if (map == NULL && retry) { /* * At this point, the svga_surfaces_flush() should already have * called in svga_texture_get_transfer(). */ svga_context_flush(svga, NULL); map = svga->swc->surface_map(svga->swc, surf, usage, &retry); } /* * Make sure we return NULL if the map fails */ if (!map) { FREE(st); return map; } /** * Compute the offset to the specific texture slice in the buffer. */ baseLevelSize.width = tex->b.b.width0; baseLevelSize.height = tex->b.b.height0; baseLevelSize.depth = tex->b.b.depth0; offset = svga3dsurface_get_image_offset(tex->key.format, baseLevelSize, tex->b.b.last_level + 1, /* numMips */ st->slice, level); if (level > 0) { assert(offset > 0); } mip_width = u_minify(tex->b.b.width0, level); mip_height = u_minify(tex->b.b.height0, level); offset += svga3dsurface_get_pixel_offset(tex->key.format, mip_width, mip_height, xoffset, yoffset, zoffset); returnVal = (void *) (map + offset); } svga->hud.map_buffer_time += (os_time_get() - begin); svga->hud.num_resources_mapped++; return returnVal; }
static void svga_texture_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); struct svga_texture *tex = svga_texture(transfer->resource); if (!st->swbuf) { if (st->use_direct_map) { svga_texture_surface_unmap(svga, transfer); } else { sws->buffer_unmap(sws, st->hwbuf); } } if (!st->use_direct_map && (st->base.usage & PIPE_TRANSFER_WRITE)) { /* Use DMA to transfer texture data */ SVGA3dSurfaceDMAFlags flags; memset(&flags, 0, sizeof flags); if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { flags.discard = TRUE; } if (transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) { flags.unsynchronized = TRUE; } svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags); } else if (transfer->usage & PIPE_TRANSFER_WRITE) { struct svga_winsys_surface *surf = svga_texture(transfer->resource)->handle; SVGA3dBox box; enum pipe_error ret; assert(svga_have_gb_objects(svga)); /* update the effected region */ box.x = transfer->box.x; box.y = transfer->box.y; switch (tex->b.b.target) { case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_2D_ARRAY: box.z = 0; break; case PIPE_TEXTURE_1D_ARRAY: box.y = box.z = 0; break; default: box.z = transfer->box.z; break; } box.w = transfer->box.width; box.h = transfer->box.height; box.d = transfer->box.depth; if (0) debug_printf("%s %d, %d, %d %d x %d x %d\n", __FUNCTION__, box.x, box.y, box.z, box.w, box.h, box.d); if (svga_have_vgpu10(svga)) { ret = update_image_vgpu10(svga, surf, &box, st->slice, transfer->level, tex->b.b.last_level + 1); } else { ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level); } assert(ret == PIPE_OK); (void) ret; } ss->texture_timestamp++; svga_age_texture_view(tex, transfer->level); if (transfer->resource->target == PIPE_TEXTURE_CUBE) svga_define_texture_level(tex, st->slice, transfer->level); else svga_define_texture_level(tex, 0, transfer->level); pipe_resource_reference(&st->base.resource, NULL); FREE(st->swbuf); if (!st->use_direct_map) { sws->buffer_destroy(sws, st->hwbuf); } FREE(st); }
/** * Patch up the upload DMA command reserved by svga_buffer_upload_command * with the final ranges. */ void svga_buffer_upload_flush(struct svga_context *svga, struct svga_buffer *sbuf) { unsigned i; struct pipe_resource *dummy; if (!sbuf->dma.pending) { //debug_printf("no dma pending on buffer\n"); return; } assert(sbuf->handle); assert(sbuf->map.num_ranges); assert(sbuf->dma.svga == svga); /* * Patch the DMA/update command with the final copy box. */ if (svga_have_gb_objects(svga)) { struct svga_3d_update_gb_image *update = sbuf->dma.updates; assert(update); for (i = 0; i < sbuf->map.num_ranges; ++i, ++update) { SVGA3dBox *box = &update->body.box; SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", sbuf->map.ranges[i].start, sbuf->map.ranges[i].end); box->x = sbuf->map.ranges[i].start; box->y = 0; box->z = 0; box->w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start; box->h = 1; box->d = 1; assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); svga->hud.num_bytes_uploaded += box->w; } } else { assert(sbuf->hwbuf); assert(sbuf->dma.boxes); SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle); for (i = 0; i < sbuf->map.num_ranges; ++i) { SVGA3dCopyBox *box = sbuf->dma.boxes + i; SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", sbuf->map.ranges[i].start, sbuf->map.ranges[i].end); box->x = sbuf->map.ranges[i].start; box->y = 0; box->z = 0; box->w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start; box->h = 1; box->d = 1; box->srcx = sbuf->map.ranges[i].start; box->srcy = 0; box->srcz = 0; assert(box->x <= sbuf->b.b.width0); assert(box->x + box->w <= sbuf->b.b.width0); svga->hud.num_bytes_uploaded += box->w; } } /* Reset sbuf for next use/upload */ sbuf->map.num_ranges = 0; assert(sbuf->head.prev && sbuf->head.next); LIST_DEL(&sbuf->head); /* remove from svga->dirty_buffers list */ #ifdef DEBUG sbuf->head.next = sbuf->head.prev = NULL; #endif sbuf->dma.pending = FALSE; sbuf->dma.flags.discard = FALSE; sbuf->dma.flags.unsynchronized = FALSE; sbuf->dma.svga = NULL; sbuf->dma.boxes = NULL; sbuf->dma.updates = NULL; /* Decrement reference count (and potentially destroy) */ dummy = &sbuf->b.b; pipe_resource_reference(&dummy, NULL); }