static void swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) { struct swr_screen *screen = swr_screen(p_screen); struct swr_resource *spr = swr_resource(pt); struct pipe_context *pipe = screen->pipe; if (spr->display_target) { /* If resource is display target, winsys manages the buffer and will * free it on displaytarget_destroy. */ swr_fence_finish(p_screen, NULL, screen->flush_fence, 0); struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, spr->display_target); } else { /* For regular resources, if the resource is being used, defer deletion * (use aligned-free) */ if (pipe && spr->status) { swr_resource_unused(pt); swr_fence_work_free(screen->flush_fence, spr->swr.pBaseAddress, true); swr_fence_work_free(screen->flush_fence, spr->secondary.pBaseAddress, true); } else { AlignedFree(spr->swr.pBaseAddress); AlignedFree(spr->secondary.pBaseAddress); } } FREE(spr); }
static void swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) { struct swr_screen *screen = swr_screen(p_screen); struct swr_resource *spr = swr_resource(pt); struct pipe_context *pipe = screen->pipe; /* Only wait on fence if the resource is being used */ if (pipe && spr->status) { /* But, if there's no fence pending, submit one. * XXX: Remove once draw timestamps are implmented. */ if (!swr_is_fence_pending(screen->flush_fence)) swr_fence_submit(swr_context(pipe), screen->flush_fence); swr_fence_finish(p_screen, screen->flush_fence, 0); swr_resource_unused(pt); } /* * Free resource primary surface. If resource is display target, winsys * manages the buffer and will free it on displaytarget_destroy. */ if (spr->display_target) { /* display target */ struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, spr->display_target); } else AlignedFree(spr->swr.pBaseAddress); AlignedFree(spr->secondary.pBaseAddress); FREE(spr); }
static void swr_resource_copy(struct pipe_context *pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) { struct swr_screen *screen = swr_screen(pipe->screen); /* If either the src or dst is a renderTarget, store tiles before copy */ swr_store_dirty_resource(pipe, src, SWR_TILE_RESOLVED); swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED); swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); swr_resource_unused(src); swr_resource_unused(dst); if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) { util_resource_copy_region( pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); return; } debug_printf("unhandled swr_resource_copy\n"); }
void swr_finish(struct pipe_context *pipe) { struct pipe_fence_handle *fence = nullptr; swr_flush(pipe, &fence, 0); swr_fence_finish(pipe->screen, NULL, fence, 0); swr_fence_reference(pipe->screen, &fence, NULL); }
static void swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) { struct swr_query *pq = swr_query(q); if (pq->fence) { if (!swr_is_fence_done(swr_fence(pq->fence))) { swr_fence_submit(swr_context(pipe), pq->fence); swr_fence_finish(pipe->screen, pq->fence, 0); } swr_fence_reference(pipe->screen, &pq->fence, NULL); } FREE(pq); }
static void swr_destroy_screen(struct pipe_screen *p_screen) { struct swr_screen *screen = swr_screen(p_screen); struct sw_winsys *winsys = screen->winsys; fprintf(stderr, "SWR destroy screen!\n"); swr_fence_finish(p_screen, screen->flush_fence, 0); swr_fence_reference(p_screen, &screen->flush_fence, NULL); JitDestroyContext(screen->hJitMgr); if (winsys->destroy) winsys->destroy(winsys); FREE(screen); }
static void swr_flush_frontbuffer(struct pipe_screen *p_screen, struct pipe_resource *resource, unsigned level, unsigned layer, void *context_private, struct pipe_box *sub_box) { struct swr_screen *screen = swr_screen(p_screen); struct sw_winsys *winsys = screen->winsys; struct swr_resource *spr = swr_resource(resource); struct pipe_context *pipe = screen->pipe; if (pipe) { swr_fence_finish(p_screen, screen->flush_fence, 0); swr_resource_unused(resource); SwrEndFrame(swr_context(pipe)->swrContext); } debug_assert(spr->display_target); if (spr->display_target) winsys->displaytarget_display( winsys, spr->display_target, context_private, sub_box); }
static void * swr_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **transfer) { struct swr_screen *screen = swr_screen(pipe->screen); struct swr_resource *spr = swr_resource(resource); struct pipe_transfer *pt; enum pipe_format format = resource->format; assert(resource); assert(level <= resource->last_level); /* If mapping an attached rendertarget, store tiles to surface and set * postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use * and nothing needs to be done at unmap. */ swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID); if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { /* If resource is in use, finish fence before mapping. * Unless requested not to block, then if not done return NULL map */ if (usage & PIPE_TRANSFER_DONTBLOCK) { if (swr_is_fence_pending(screen->flush_fence)) return NULL; } else { if (spr->status) { /* But, if there's no fence pending, submit one. * XXX: Remove once draw timestamps are finished. */ if (!swr_is_fence_pending(screen->flush_fence)) swr_fence_submit(swr_context(pipe), screen->flush_fence); swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); swr_resource_unused(resource); } } } pt = CALLOC_STRUCT(pipe_transfer); if (!pt) return NULL; pipe_resource_reference(&pt->resource, resource); pt->usage = (pipe_transfer_usage)usage; pt->level = level; pt->box = *box; pt->stride = spr->swr.pitch; pt->layer_stride = spr->swr.qpitch * spr->swr.pitch; /* if we're mapping the depth/stencil, copy in stencil for the section * being read in */ if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) { size_t zbase, sbase; for (int z = box->z; z < box->z + box->depth; z++) { zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch + spr->mip_offsets[level]; sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch + spr->secondary_mip_offsets[level]; for (int y = box->y; y < box->y + box->height; y++) { if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { for (int x = box->x; x < box->x + box->width; x++) ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3] = ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x]; } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { for (int x = box->x; x < box->x + box->width; x++) ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4] = ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x]; } zbase += spr->swr.pitch; sbase += spr->secondary.pitch; } } } unsigned offset = box->z * pt->layer_stride + util_format_get_nblocksy(format, box->y) * pt->stride + util_format_get_stride(format, box->x); *transfer = pt; return (void*)(spr->swr.xpBaseAddress + offset + spr->mip_offsets[level]); }
// XXX Create a fence callback, rather than stalling SwrWaitForIdle static void swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) { struct swr_context *ctx = swr_context(pipe); assert(pq->result); union pipe_query_result *result = pq->result; boolean enable_stats = pq->enable_stats; SWR_STATS swr_stats = {0}; if (pq->fence) { if (!swr_is_fence_done(swr_fence(pq->fence))) { swr_fence_submit(ctx, pq->fence); swr_fence_finish(pipe->screen, pq->fence, 0); } swr_fence_reference(pipe->screen, &pq->fence, NULL); } /* * These queries don't need SWR Stats enabled in the core * Set and return. */ switch (pq->type) { case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: result->u64 = swr_get_timestamp(pipe->screen); return; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* nothing to do here */ return; break; case PIPE_QUERY_GPU_FINISHED: result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId vs LastRetiredId? */ return; break; default: /* Any query that needs SwrCore stats */ break; } /* * All other results are collected from SwrCore counters */ /* XXX, Should turn this into a fence callback and skip the stall */ SwrGetStats(ctx->swrContext, &swr_stats); /* SwrGetStats returns immediately, wait for collection */ SwrWaitForIdle(ctx->swrContext); switch (pq->type) { case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_COUNTER: result->u64 = swr_stats.DepthPassCount; break; case PIPE_QUERY_PRIMITIVES_GENERATED: result->u64 = swr_stats.IaPrimitives; break; case PIPE_QUERY_PRIMITIVES_EMITTED: result->u64 = swr_stats.SoNumPrimsWritten[pq->index]; break; case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; so_stats->num_primitives_written = swr_stats.SoNumPrimsWritten[pq->index]; so_stats->primitives_storage_needed = swr_stats.SoPrimStorageNeeded[pq->index]; } break; case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *p_stats = &result->pipeline_statistics; p_stats->ia_vertices = swr_stats.IaVertices; p_stats->ia_primitives = swr_stats.IaPrimitives; p_stats->vs_invocations = swr_stats.VsInvocations; p_stats->gs_invocations = swr_stats.GsInvocations; p_stats->gs_primitives = swr_stats.GsPrimitives; p_stats->c_invocations = swr_stats.CPrimitives; p_stats->c_primitives = swr_stats.CPrimitives; p_stats->ps_invocations = swr_stats.PsInvocations; p_stats->hs_invocations = swr_stats.HsInvocations; p_stats->ds_invocations = swr_stats.DsInvocations; p_stats->cs_invocations = swr_stats.CsInvocations; } break; default: assert(0 && "Unsupported query"); break; } /* Only change stat collection if there are no active queries */ if (ctx->active_queries == 0) SwrEnableStats(ctx->swrContext, enable_stats); }
static boolean swr_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, union pipe_query_result *result) { struct swr_context *ctx = swr_context(pipe); struct swr_query *pq = swr_query(q); if (pq->fence) { if (!swr_is_fence_done(swr_fence(pq->fence))) { swr_fence_submit(ctx, pq->fence); if (!wait) return FALSE; swr_fence_finish(pipe->screen, pq->fence, 0); } swr_fence_reference(pipe->screen, &pq->fence, NULL); } /* XXX: Need to handle counter rollover */ switch (pq->type) { /* Booleans */ case PIPE_QUERY_OCCLUSION_PREDICATE: result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE; break; case PIPE_QUERY_GPU_FINISHED: result->b = pq->end.b; break; /* Counters */ case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: result->u64 = pq->end.u64 - pq->start.u64; break; /* Structures */ case PIPE_QUERY_SO_STATISTICS: { struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; so_stats->num_primitives_written = end->num_primitives_written - start->num_primitives_written; so_stats->primitives_storage_needed = end->primitives_storage_needed - start->primitives_storage_needed; } break; case PIPE_QUERY_TIMESTAMP_DISJOINT: { /* os_get_time_nano returns nanoseconds */ result->timestamp_disjoint.frequency = UINT64_C(1000000000); result->timestamp_disjoint.disjoint = FALSE; } break; case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *p_stats = &result->pipeline_statistics; struct pipe_query_data_pipeline_statistics *start = &pq->start.pipeline_statistics; struct pipe_query_data_pipeline_statistics *end = &pq->end.pipeline_statistics; p_stats->ia_vertices = end->ia_vertices - start->ia_vertices; p_stats->ia_primitives = end->ia_primitives - start->ia_primitives; p_stats->vs_invocations = end->vs_invocations - start->vs_invocations; p_stats->gs_invocations = end->gs_invocations - start->gs_invocations; p_stats->gs_primitives = end->gs_primitives - start->gs_primitives; p_stats->c_invocations = end->c_invocations - start->c_invocations; p_stats->c_primitives = end->c_primitives - start->c_primitives; p_stats->ps_invocations = end->ps_invocations - start->ps_invocations; p_stats->hs_invocations = end->hs_invocations - start->hs_invocations; p_stats->ds_invocations = end->ds_invocations - start->ds_invocations; p_stats->cs_invocations = end->cs_invocations - start->cs_invocations; } break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; uint64_t num_primitives_written = end->num_primitives_written - start->num_primitives_written; uint64_t primitives_storage_needed = end->primitives_storage_needed - start->primitives_storage_needed; result->b = num_primitives_written > primitives_storage_needed; } break; default: assert(0 && "Unsupported query"); break; } return TRUE; }
/* * Draw vertex arrays, with optional indexing, optional instancing. */ static void swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct swr_context *ctx = swr_context(pipe); if (!info->count_from_stream_output && !info->indirect && !info->primitive_restart && !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) return; if (!swr_check_render_cond(pipe)) return; if (info->indirect) { util_draw_indirect(pipe, info); return; } /* If indexed draw, force vertex validation since index buffer comes * from draw info. */ if (info->index_size) ctx->dirty |= SWR_NEW_VERTEX; /* Update derived state, pass draw info to update function. */ swr_update_derived(pipe, info); swr_update_draw_context(ctx); if (ctx->vs->pipe.stream_output.num_outputs) { if (!ctx->vs->soFunc[info->mode]) { STREAMOUT_COMPILE_STATE state = {0}; struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; state.numVertsPerPrim = u_vertices_per_prim(info->mode); uint32_t offsets[MAX_SO_STREAMS] = {0}; uint32_t num = 0; for (uint32_t i = 0; i < so->num_outputs; i++) { assert(so->output[i].stream == 0); // @todo uint32_t output_buffer = so->output[i].output_buffer; if (so->output[i].dst_offset != offsets[output_buffer]) { // hole - need to fill state.stream.decl[num].bufferIndex = output_buffer; state.stream.decl[num].hole = true; state.stream.decl[num].componentMask = (1 << (so->output[i].dst_offset - offsets[output_buffer])) - 1; num++; offsets[output_buffer] = so->output[i].dst_offset; } unsigned attrib_slot = so->output[i].register_index; attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs); state.stream.decl[num].bufferIndex = output_buffer; state.stream.decl[num].attribSlot = attrib_slot; state.stream.decl[num].componentMask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component; state.stream.decl[num].hole = false; num++; offsets[output_buffer] += so->output[i].num_components; } state.stream.numDecls = num; HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); } ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); } struct swr_vertex_element_state *velems = ctx->velems; if (info->primitive_restart) velems->fsState.cutIndex = info->restart_index; else velems->fsState.cutIndex = 0; velems->fsState.bEnableCutIndex = info->primitive_restart; velems->fsState.bPartialVertexBuffer = (info->min_index > 0); swr_jit_fetch_key key; swr_generate_fetch_key(key, velems); auto search = velems->map.find(key); if (search != velems->map.end()) { velems->fsFunc = search->second; } else { HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); debug_printf("fetch shader %p\n", velems->fsFunc); assert(velems->fsFunc && "Error: FetchShader = NULL"); velems->map.insert(std::make_pair(key, velems->fsFunc)); } ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc); /* Set up frontend state * XXX setup provokingVertex & topologyProvokingVertex */ SWR_FRONTEND_STATE feState = {0}; // feState.vsVertexSize seeds the PA size that is used as an interface // between all the shader stages, so it has to be large enough to // incorporate all interfaces between stages // max of gs and vs num_outputs feState.vsVertexSize = ctx->vs->info.base.num_outputs; if (ctx->gs && ctx->gs->info.base.num_outputs > feState.vsVertexSize) { feState.vsVertexSize = ctx->gs->info.base.num_outputs; } if (ctx->vs->info.base.num_outputs) { // gs does not adjust for position in SGV slot at input from vs if (!ctx->gs) feState.vsVertexSize--; } // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT; // The PA in the clipper does not handle BE vertex sizes // different from FE. Increase vertexsize only for the cases that needed it // primid needs a slot if (ctx->fs->info.base.uses_primid) feState.vsVertexSize++; // sprite coord enable if (ctx->rasterizer->sprite_coord_enable) feState.vsVertexSize++; if (ctx->rasterizer->flatshade_first) { feState.provokingVertex = {1, 0, 0}; } else { feState.provokingVertex = {2, 1, 2}; } enum pipe_prim_type topology; if (ctx->gs) topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; else topology = info->mode; switch (topology) { case PIPE_PRIM_TRIANGLE_FAN: feState.topologyProvokingVertex = feState.provokingVertex.triFan; break; case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_TRIANGLES: feState.topologyProvokingVertex = feState.provokingVertex.triStripList; break; case PIPE_PRIM_QUAD_STRIP: case PIPE_PRIM_QUADS: if (ctx->rasterizer->flatshade_first) feState.topologyProvokingVertex = 0; else feState.topologyProvokingVertex = 3; break; case PIPE_PRIM_LINES: case PIPE_PRIM_LINE_LOOP: case PIPE_PRIM_LINE_STRIP: feState.topologyProvokingVertex = feState.provokingVertex.lineStripList; break; default: feState.topologyProvokingVertex = 0; } feState.bEnableCutIndex = info->primitive_restart; ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState); if (info->index_size) ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext, swr_convert_prim_topology(info->mode), info->count, info->instance_count, info->start, info->index_bias, info->start_instance); else ctx->api.pfnSwrDrawInstanced(ctx->swrContext, swr_convert_prim_topology(info->mode), info->count, info->instance_count, info->start, info->start_instance); /* On large client-buffer draw, we used client buffer directly, without * copy. Block until draw is finished. * VMD is an example application that benefits from this. */ if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) { struct swr_screen *screen = swr_screen(pipe->screen); swr_fence_submit(ctx, screen->flush_fence); swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); } }
static void * swr_transfer_map(struct pipe_context *pipe, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **transfer) { struct swr_screen *screen = swr_screen(pipe->screen); struct swr_resource *spr = swr_resource(resource); struct pipe_transfer *pt; enum pipe_format format = resource->format; assert(resource); assert(level <= resource->last_level); /* If mapping an attached rendertarget, store tiles to surface and set * postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use * and nothing needs to be done at unmap. */ swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID); if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { /* If resource is in use, finish fence before mapping. * Unless requested not to block, then if not done return NULL map */ if (usage & PIPE_TRANSFER_DONTBLOCK) { if (swr_is_fence_pending(screen->flush_fence)) return NULL; } else { if (spr->status) { /* But, if there's no fence pending, submit one. * XXX: Remove once draw timestamps are finished. */ if (!swr_is_fence_pending(screen->flush_fence)) swr_fence_submit(swr_context(pipe), screen->flush_fence); swr_fence_finish(pipe->screen, screen->flush_fence, 0); swr_resource_unused(resource); } } } pt = CALLOC_STRUCT(pipe_transfer); if (!pt) return NULL; pipe_resource_reference(&pt->resource, resource); pt->level = level; pt->box = *box; pt->stride = spr->row_stride[level]; pt->layer_stride = spr->img_stride[level]; /* if we're mapping the depth/stencil, copy in stencil */ if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT && spr->has_stencil) { for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; } } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && spr->has_stencil) { for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; } } unsigned offset = box->z * pt->layer_stride + box->y * pt->stride + box->x * util_format_get_blocksize(format); *transfer = pt; return spr->swr.pBaseAddress + offset + spr->mip_offsets[level]; }