void svga_context_flush_buffers(struct svga_context *svga) { struct list_head *curr, *next; struct svga_buffer *sbuf; curr = svga->dirty_buffers.next; next = curr->next; while(curr != &svga->dirty_buffers) { sbuf = LIST_ENTRY(struct svga_buffer, curr, head); assert(p_atomic_read(&sbuf->b.b.reference.count) != 0); assert(sbuf->dma.pending); svga_buffer_upload_flush(svga, sbuf); curr = next; next = curr->next; } }
/** * Note a dirty range. * * This function only notes the range down. It doesn't actually emit a DMA * upload command. That only happens when a context tries to refer to this * buffer, and the DMA upload command is added to that context's command * buffer. * * We try to lump as many contiguous DMA transfers together as possible. */ void svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end) { unsigned i; unsigned nearest_range; unsigned nearest_dist; assert(end > start); if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { nearest_range = sbuf->map.num_ranges; nearest_dist = ~0; } else { nearest_range = SVGA_BUFFER_MAX_RANGES - 1; nearest_dist = 0; } /* * Try to grow one of the ranges. */ for (i = 0; i < sbuf->map.num_ranges; ++i) { int left_dist; int right_dist; int dist; left_dist = start - sbuf->map.ranges[i].end; right_dist = sbuf->map.ranges[i].start - end; dist = MAX2(left_dist, right_dist); if (dist <= 0) { /* * Ranges are contiguous or overlapping -- extend this one and return. * * Note that it is not this function's task to prevent overlapping * ranges, as the GMR was already given so it is too late to do * anything. If the ranges overlap here it must surely be because * PIPE_TRANSFER_UNSYNCHRONIZED was set. */ sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start); sbuf->map.ranges[i].end = MAX2(sbuf->map.ranges[i].end, end); return; } else { /* * Discontiguous ranges -- keep track of the nearest range. */ if (dist < nearest_dist) { nearest_range = i; nearest_dist = dist; } } } /* * We cannot add a new range to an existing DMA command, so patch-up the * pending DMA upload and start clean. */ svga_buffer_upload_flush(sbuf->dma.svga, sbuf); assert(!sbuf->dma.pending); assert(!sbuf->dma.svga); assert(!sbuf->dma.boxes); if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { /* * Add a new range. */ sbuf->map.ranges[sbuf->map.num_ranges].start = start; sbuf->map.ranges[sbuf->map.num_ranges].end = end; ++sbuf->map.num_ranges; } else { /* * Everything else failed, so just extend the nearest range. * * It is OK to do this because we always keep a local copy of the * host buffer data, for SW TNL, and the host never modifies the buffer. */ assert(nearest_range < SVGA_BUFFER_MAX_RANGES); assert(nearest_range < sbuf->map.num_ranges); sbuf->map.ranges[nearest_range].start = MIN2(sbuf->map.ranges[nearest_range].start, start); sbuf->map.ranges[nearest_range].end = MAX2(sbuf->map.ranges[nearest_range].end, end); } }
/** * Note a dirty range. * * This function only notes the range down. It doesn't actually emit a DMA * upload command. That only happens when a context tries to refer to this * buffer, and the DMA upload command is added to that context's command buffer. * * We try to lump as many contiguous DMA transfers together as possible. */ void svga_buffer_add_range(struct svga_buffer *sbuf, unsigned start, unsigned end) { unsigned i; unsigned nearest_range; unsigned nearest_dist; assert(end > start); if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { nearest_range = sbuf->map.num_ranges; nearest_dist = ~0; } else { nearest_range = SVGA_BUFFER_MAX_RANGES - 1; nearest_dist = 0; } /* * Try to grow one of the ranges. * * Note that it is not this function task to care about overlapping ranges, * as the GMR was already given so it is too late to do anything. Situations * where overlapping ranges may pose a problem should be detected via * pipe_context::is_resource_referenced and the context that refers to the * buffer should be flushed. */ for(i = 0; i < sbuf->map.num_ranges; ++i) { int left_dist; int right_dist; int dist; left_dist = start - sbuf->map.ranges[i].end; right_dist = sbuf->map.ranges[i].start - end; dist = MAX2(left_dist, right_dist); if (dist <= 0) { /* * Ranges are contiguous or overlapping -- extend this one and return. */ sbuf->map.ranges[i].start = MIN2(sbuf->map.ranges[i].start, start); sbuf->map.ranges[i].end = MAX2(sbuf->map.ranges[i].end, end); return; } else { /* * Discontiguous ranges -- keep track of the nearest range. */ if (dist < nearest_dist) { nearest_range = i; nearest_dist = dist; } } } /* * We cannot add a new range to an existing DMA command, so patch-up the * pending DMA upload and start clean. */ if(sbuf->dma.pending) svga_buffer_upload_flush(sbuf->dma.svga, sbuf); assert(!sbuf->dma.pending); assert(!sbuf->dma.svga); assert(!sbuf->dma.boxes); if (sbuf->map.num_ranges < SVGA_BUFFER_MAX_RANGES) { /* * Add a new range. */ sbuf->map.ranges[sbuf->map.num_ranges].start = start; sbuf->map.ranges[sbuf->map.num_ranges].end = end; ++sbuf->map.num_ranges; } else { /* * Everything else failed, so just extend the nearest range. * * It is OK to do this because we always keep a local copy of the * host buffer data, for SW TNL, and the host never modifies the buffer. */ assert(nearest_range < SVGA_BUFFER_MAX_RANGES); assert(nearest_range < sbuf->map.num_ranges); sbuf->map.ranges[nearest_range].start = MIN2(sbuf->map.ranges[nearest_range].start, start); sbuf->map.ranges[nearest_range].end = MAX2(sbuf->map.ranges[nearest_range].end, end); } }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static struct pipe_transfer * svga_buffer_get_transfer(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; transfer = CALLOC_STRUCT(pipe_transfer); if (transfer == NULL) { return NULL; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. */ svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (sbuf->hwbuf) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); return NULL; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !sbuf->hwbuf) { if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); return NULL; } } } return transfer; }
/** * Create a buffer transfer. * * Unlike texture DMAs (which are written immediately to the command buffer and * therefore inherently serialized with other context operations), for buffers * we try to coalesce multiple range mappings (i.e, multiple calls to this * function) into a single DMA command, for better efficiency in command * processing. This means we need to exercise extra care here to ensure that * the end result is exactly the same as if one DMA was used for every mapped * range. */ static void * svga_buffer_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_buffer *sbuf = svga_buffer(resource); struct pipe_transfer *transfer; uint8_t *map = NULL; int64_t begin = svga_get_time(svga); SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); assert(box->y == 0); assert(box->z == 0); assert(box->height == 1); assert(box->depth == 1); transfer = MALLOC_STRUCT(pipe_transfer); if (!transfer) { goto done; } transfer->resource = resource; transfer->level = level; transfer->usage = usage; transfer->box = *box; transfer->stride = 0; transfer->layer_stride = 0; if (usage & PIPE_TRANSFER_WRITE) { /* If we write to the buffer for any reason, free any saved translated * vertices. */ pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); } if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; /* Host-side buffers can only be dirtied with vgpu10 features * (streamout and buffer copy). */ assert(svga_have_vgpu10(svga)); if (!sbuf->user) { (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); } if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); svga_context_finish(svga); } assert(sbuf->handle); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); assert(ret == PIPE_OK); } svga->hud.num_readbacks++; svga_context_finish(svga); sbuf->dirty = FALSE; } if (usage & PIPE_TRANSFER_WRITE) { if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { /* * Flush any pending primitives, finish writing any pending DMA * commands, and tell the host to discard the buffer contents on * the next DMA operation. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); /* * Instead of flushing the context command buffer, simply discard * the current hwbuf, and start a new one. * With GB objects, the map operation takes care of this * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, * and the old backing store is busy. */ if (!svga_have_gb_objects(svga)) svga_buffer_destroy_hw_storage(ss, sbuf); } sbuf->map.num_ranges = 0; sbuf->dma.flags.discard = TRUE; } if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { if (!sbuf->map.num_ranges) { /* * No pending ranges to upload so far, so we can tell the host to * not synchronize on the next DMA command. */ sbuf->dma.flags.unsynchronized = TRUE; } } else { /* * Synchronizing, so flush any pending primitives, finish writing any * pending DMA command, and ensure the next DMA will be done in order. */ svga_hwtnl_flush_buffer(svga, resource); if (sbuf->dma.pending) { svga_buffer_upload_flush(svga, sbuf); if (svga_buffer_has_hw_storage(sbuf)) { /* * We have a pending DMA upload from a hardware buffer, therefore * we need to ensure that the host finishes processing that DMA * command before the state tracker can start overwriting the * hardware buffer. * * XXX: This could be avoided by tying the hardware buffer to * the transfer (just as done with textures), which would allow * overlapping DMAs commands to be queued on the same context * buffer. However, due to the likelihood of software vertex * processing, it is more convenient to hold on to the hardware * buffer, allowing to quickly access the contents from the CPU * without having to do a DMA download from the host. */ if (usage & PIPE_TRANSFER_DONTBLOCK) { /* * Flushing the command buffer here will most likely cause * the map of the hwbuf below to block, so preemptively * return NULL here if DONTBLOCK is set to prevent unnecessary * command buffer flushes. */ FREE(transfer); goto done; } svga_context_flush(svga, NULL); } } sbuf->dma.flags.unsynchronized = FALSE; } } if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ if (0) { debug_printf("%s: failed to allocate %u KB of DMA, " "splitting DMA transfers\n", __FUNCTION__, (sbuf->b.b.width0 + 1023)/1024); } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); if (!sbuf->swbuf) { FREE(transfer); goto done; } } } if (sbuf->swbuf) { /* User/malloc buffer */ map = sbuf->swbuf; } else if (svga_buffer_has_hw_storage(sbuf)) { boolean retry; map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); if (map == NULL && retry) { /* * At this point, svga_buffer_get_transfer() has already * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL * for this buffer. */ svga_context_flush(svga, NULL); map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); } } else { map = NULL; } if (map) { ++sbuf->map.count; map += transfer->box.x; *ptransfer = transfer; } else { FREE(transfer); } svga->hud.map_buffer_time += (svga_get_time(svga) - begin); done: SVGA_STATS_TIME_POP(svga_sws(svga)); return map; }