void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset) { *rbuffer = NULL; if (R600_BIG_ENDIAN) { uint32_t *tmpPtr; unsigned i; if (!(tmpPtr = malloc(size))) { R600_ERR("Failed to allocate BE swap buffer.\n"); return; } for (i = 0; i < size / 4; ++i) { tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); } u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset, (struct pipe_resource**)rbuffer); free(tmpPtr); } else { u_upload_data(rctx->uploader, 0, size, ptr, const_offset, (struct pipe_resource**)rbuffer); } }
static void finalize_cbuf_state(struct ilo_context *ilo, struct ilo_cbuf_state *cbuf, const struct ilo_shader_state *sh) { uint32_t upload_mask = cbuf->enabled_mask; /* skip CBUF0 if the kernel does not need it */ upload_mask &= ~ilo_shader_get_kernel_param(sh, ILO_KERNEL_SKIP_CBUF0_UPLOAD); while (upload_mask) { const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; unsigned offset, i; i = u_bit_scan(&upload_mask); /* no need to upload */ if (cbuf->cso[i].resource) continue; u_upload_data(ilo->uploader, 0, cbuf->cso[i].user_buffer_size, cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource); ilo_gpe_init_view_surface_for_buffer(ilo->dev, ilo_buffer(cbuf->cso[i].resource), offset, cbuf->cso[i].user_buffer_size, util_format_get_blocksize(elem_format), elem_format, false, false, &cbuf->cso[i].surface); ilo->state_vector.dirty |= ILO_DIRTY_CBUF; } }
static void setup_index_buffer(struct st_context *st, const struct _mesa_index_buffer *ib, struct pipe_index_buffer *ibuffer) { struct gl_buffer_object *bufobj = ib->obj; ibuffer->index_size = vbo_sizeof_ib_type(ib->type); /* get/create the index buffer object */ if (_mesa_is_bufferobj(bufobj)) { /* indices are in a real VBO */ ibuffer->buffer = st_buffer_object(bufobj)->buffer; ibuffer->offset = pointer_to_offset(ib->ptr); } else if (st->indexbuf_uploader) { u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size, ib->ptr, &ibuffer->offset, &ibuffer->buffer); u_upload_unmap(st->indexbuf_uploader); } else { /* indices are in user space memory */ ibuffer->user_buffer = ib->ptr; } cso_set_index_buffer(st->cso_context, ibuffer); }
/* As above, but upload the full contents of a buffer. Useful for * uploading user buffers, avoids generating an explosion of GPU * buffers if you have an app that does lots of small vertex buffer * renders or DrawElements calls. */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, unsigned offset, unsigned size, struct pipe_buffer *inbuf, unsigned *out_offset, struct pipe_buffer **outbuf ) { enum pipe_error ret = PIPE_OK; const char *map = NULL; map = (const char *)pipe_buffer_map( upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ ); if (map == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto done; } if (0) debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); ret = u_upload_data( upload, size, map + offset, out_offset, outbuf ); if (ret) goto done; done: if (map) pipe_buffer_unmap( upload->screen, inbuf ); return ret; }
/** * Basically, translate Mesa's index buffer information into * a pipe_index_buffer object. * \return TRUE or FALSE for success/failure */ static boolean setup_index_buffer(struct st_context *st, const struct _mesa_index_buffer *ib, struct pipe_index_buffer *ibuffer) { struct gl_buffer_object *bufobj = ib->obj; ibuffer->index_size = vbo_sizeof_ib_type(ib->type); /* get/create the index buffer object */ if (_mesa_is_bufferobj(bufobj)) { /* indices are in a real VBO */ ibuffer->buffer = st_buffer_object(bufobj)->buffer; ibuffer->offset = pointer_to_offset(ib->ptr); } else if (st->indexbuf_uploader) { /* upload indexes from user memory into a real buffer */ if (u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size, ib->ptr, &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) { /* out of memory */ return FALSE; } u_upload_unmap(st->indexbuf_uploader); } else { /* indices are in user space memory */ ibuffer->user_buffer = ib->ptr; } cso_set_index_buffer(st->cso_context, ibuffer); return TRUE; }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, unsigned shader_type) { assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || shader_type == PIPE_SHADER_TESS_EVAL); /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ if (params->StateFlags) _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. * Let's use a user buffer to avoid an unnecessary copy. */ if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, paramBytes, params->ParameterValues, &cb.buffer_offset, &cb.buffer); u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = params->ParameterValues; cb.buffer_offset = 0; } cb.buffer_size = paramBytes; if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __func__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } cso_set_constant_buffer(st->cso_context, shader_type, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); st->state.constants[shader_type].ptr = params->ParameterValues; st->state.constants[shader_type].size = paramBytes; } else if (st->state.constants[shader_type].ptr) { /* Unbind. */ st->state.constants[shader_type].ptr = NULL; st->state.constants[shader_type].size = 0; cso_set_constant_buffer(st->cso_context, shader_type, 0, NULL); } }
static void finalize_index_buffer(struct ilo_context *ilo) { struct ilo_state_vector *vec = &ilo->state_vector; const bool need_upload = (vec->draw->indexed && (vec->ib.user_buffer || vec->ib.offset % vec->ib.index_size)); struct pipe_resource *current_hw_res = NULL; if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload) return; pipe_resource_reference(¤t_hw_res, vec->ib.hw_resource); if (need_upload) { const unsigned offset = vec->ib.index_size * vec->draw->start; const unsigned size = vec->ib.index_size * vec->draw->count; unsigned hw_offset; if (vec->ib.user_buffer) { u_upload_data(ilo->uploader, 0, size, vec->ib.user_buffer + offset, &hw_offset, &vec->ib.hw_resource); } else { u_upload_buffer(ilo->uploader, 0, vec->ib.offset + offset, size, vec->ib.buffer, &hw_offset, &vec->ib.hw_resource); } /* the HW offset should be aligned */ assert(hw_offset % vec->ib.index_size == 0); vec->ib.draw_start_offset = hw_offset / vec->ib.index_size; /* * INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW * resource */ vec->ib.draw_start_offset -= vec->draw->start; } else { pipe_resource_reference(&vec->ib.hw_resource, vec->ib.buffer); /* note that index size may be zero when the draw is not indexed */ if (vec->draw->indexed) vec->ib.draw_start_offset = vec->ib.offset / vec->ib.index_size; else vec->ib.draw_start_offset = 0; } /* treat the IB as clean if the HW states do not change */ if (vec->ib.hw_resource == current_hw_res && vec->ib.hw_index_size == vec->ib.index_size) vec->dirty &= ~ILO_DIRTY_IB; else vec->ib.hw_index_size = vec->ib.index_size; pipe_resource_reference(¤t_hw_res, NULL); }
static void iris_update_grid_size_resource(struct iris_context *ice, const struct pipe_grid_info *grid) { const struct iris_screen *screen = (void *) ice->ctx.screen; const struct isl_device *isl_dev = &screen->isl_dev; struct iris_state_ref *grid_ref = &ice->state.grid_size; struct iris_state_ref *state_ref = &ice->state.grid_surf_state; // XXX: if the shader doesn't actually care about the grid info, // don't bother uploading the surface? if (grid->indirect) { pipe_resource_reference(&grid_ref->res, grid->indirect); grid_ref->offset = grid->indirect_offset; /* Zero out the grid size so that the next non-indirect grid launch will * re-upload it properly. */ memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid)); } else { /* If the size is the same, we don't need to upload anything. */ if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) == 0) return; memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid)); u_upload_data(ice->state.dynamic_uploader, 0, sizeof(grid->grid), 4, grid->grid, &grid_ref->offset, &grid_ref->res); } void *surf_map = NULL; u_upload_alloc(ice->state.surface_uploader, 0, isl_dev->ss.size, isl_dev->ss.align, &state_ref->offset, &state_ref->res, &surf_map); state_ref->offset += iris_bo_offset_from_base_address(iris_resource_bo(state_ref->res)); isl_buffer_fill_state(&screen->isl_dev, surf_map, .address = grid_ref->offset + iris_resource_bo(grid_ref->res)->gtt_offset, .size_B = sizeof(grid->grid), .format = ISL_FORMAT_RAW, .stride_B = 1, .mocs = 4); // XXX: MOCS ice->state.dirty |= IRIS_DIRTY_BINDINGS_CS; }
static void r300_render_draw_elements(struct vbuf_render* render, const ushort* indices, uint count) { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; unsigned max_index = (r300->vbo->size - r300->draw_vbo_offset) / (r300render->r300->vertex_info.size * 4) - 1; struct pipe_resource *index_buffer = NULL; unsigned index_buffer_offset; CS_LOCALS(r300); DBG(r300, DBG_DRAW, "r300: render_draw_elements (count: %d)\n", count); u_upload_data(r300->uploader, 0, count * 2, indices, &index_buffer_offset, &index_buffer); if (!index_buffer) { return; } if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_EMIT_VARRAYS_SWTCL | PREP_INDEXED, index_buffer, 12, 0, 0, -1)) { pipe_resource_reference(&index_buffer, NULL); return; } BEGIN_CS(12); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, r300render->prim)); OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS(index_buffer_offset); OUT_CS((count + 1) / 2); OUT_CS_RELOC(r300_resource(index_buffer)); END_CS; pipe_resource_reference(&index_buffer, NULL); }
void r300_upload_index_buffer(struct r300_context *r300, struct pipe_resource **index_buffer, unsigned index_size, unsigned *start, unsigned count, uint8_t *ptr) { unsigned index_offset; *index_buffer = NULL; u_upload_data(r300->uploader, 0, count * index_size, ptr + (*start * index_size), &index_offset, index_buffer); *start = index_offset / index_size; }
/* As above, but upload the full contents of a buffer. Useful for * uploading user buffers, avoids generating an explosion of GPU * buffers if you have an app that does lots of small vertex buffer * renders or DrawElements calls. */ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, unsigned min_out_offset, unsigned offset, unsigned size, struct pipe_resource *inbuf, unsigned *out_offset, struct pipe_resource **outbuf, boolean *flushed ) { enum pipe_error ret = PIPE_OK; struct pipe_transfer *transfer = NULL; const char *map = NULL; map = (const char *)pipe_buffer_map_range(upload->pipe, inbuf, offset, size, PIPE_TRANSFER_READ, &transfer); if (map == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto done; } if (0) debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); ret = u_upload_data( upload, min_out_offset, size, map + offset, out_offset, outbuf, flushed ); done: if (map) pipe_buffer_unmap( upload->pipe, transfer ); return ret; }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); perf_debug("Fallback conversion for %d %s vertices\n", info->count, u_prim_name(info->mode)); return; } /* Before setting up the draw, do any fixup blits necessary. */ vc4_update_shadow_textures(pctx, &vc4->verttex); vc4_update_shadow_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx); vc4_get_draw_cl_space(vc4); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); vc4_update_compiled_shaders(vc4, info->mode); vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | VC4_DIRTY_VTXSTATE | VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER | VC4_DIRTY_COMPILED_CS | VC4_DIRTY_COMPILED_VS | VC4_DIRTY_COMPILED_FS | vc4->prog.cs->uniform_dirty_bits | vc4->prog.vs->uniform_dirty_bits | vc4->prog.fs->uniform_dirty_bits)) || vc4->last_index_bias != info->index_bias) { vc4_emit_gl_shader_state(vc4, info); } vc4->dirty = 0; /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ struct vc4_cl_out *bcl = cl_start(&vc4->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; struct pipe_resource *prsc; if (vc4->indexbuf.index_size == 4) { prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, info->count, &offset); index_size = 2; } else { if (vc4->indexbuf.user_buffer) { prsc = NULL; u_upload_data(vc4->uploader, 0, info->count * index_size, 4, vc4->indexbuf.user_buffer, &offset, &prsc); } else { prsc = vc4->indexbuf.buffer; } } struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&vc4->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&bcl, info->mode); cl_u32(&bcl, info->count); cl_u32(&bcl, info->start); } cl_end(&vc4->bcl, bcl); if (vc4->zsa && vc4->zsa->base.depth.enabled) { vc4->resolve |= PIPE_CLEAR_DEPTH; } if (vc4->zsa && vc4->zsa->base.stencil[0].enabled) vc4->resolve |= PIPE_CLEAR_STENCIL; vc4->resolve |= PIPE_CLEAR_COLOR0; vc4->shader_rec_count++; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }
void r600_upload_index_buffer(struct r600_context *rctx, struct pipe_index_buffer *ib, unsigned count) { u_upload_data(rctx->uploader, 0, count * ib->index_size, ib->user_buffer, &ib->offset, &ib->buffer); }
static void u_vbuf_upload_buffers(struct u_vbuf_priv *mgr, int min_index, int max_index, unsigned instance_count) { unsigned i; unsigned count = max_index + 1 - min_index; unsigned nr_velems = mgr->ve->count; unsigned nr_vbufs = mgr->b.nr_vertex_buffers; struct pipe_vertex_element *velems = mgr->fallback_ve ? mgr->fallback_velems : mgr->ve->ve; unsigned start_offset[PIPE_MAX_ATTRIBS]; unsigned end_offset[PIPE_MAX_ATTRIBS] = {0}; /* Determine how much data needs to be uploaded. */ for (i = 0; i < nr_velems; i++) { struct pipe_vertex_element *velem = &velems[i]; unsigned index = velem->vertex_buffer_index; struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index]; unsigned instance_div, first, size; /* Skip the buffer generated by translate. */ if (index == mgr->fallback_vb_slot) { continue; } assert(vb->buffer); if (!u_vbuf_resource(vb->buffer)->user_ptr) { continue; } instance_div = velem->instance_divisor; first = vb->buffer_offset + velem->src_offset; if (!vb->stride) { /* Constant attrib. */ size = mgr->ve->src_format_size[i]; } else if (instance_div) { /* Per-instance attrib. */ unsigned count = (instance_count + instance_div - 1) / instance_div; size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; } else { /* Per-vertex attrib. */ first += vb->stride * min_index; size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; } /* Update offsets. */ if (!end_offset[index]) { start_offset[index] = first; end_offset[index] = first + size; } else { if (first < start_offset[index]) start_offset[index] = first; if (first + size > end_offset[index]) end_offset[index] = first + size; } } /* Upload buffers. */ for (i = 0; i < nr_vbufs; i++) { unsigned start, end = end_offset[i]; boolean flushed; struct pipe_vertex_buffer *real_vb; uint8_t *ptr; if (!end) { continue; } start = start_offset[i]; assert(start < end); real_vb = &mgr->b.real_vertex_buffer[i]; ptr = u_vbuf_resource(mgr->b.vertex_buffer[i].buffer)->user_ptr; u_upload_data(mgr->b.uploader, start, end - start, ptr + start, &real_vb->buffer_offset, &real_vb->buffer, &flushed); real_vb->buffer_offset -= start; } }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); perf_debug("Fallback conversion for %d %s vertices\n", info->count, u_prim_name(info->mode)); return; } /* Before setting up the draw, do any fixup blits necessary. */ vc4_predraw_check_textures(pctx, &vc4->verttex); vc4_predraw_check_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx, info->count); struct vc4_job *job = vc4_get_job_for_fbo(vc4); vc4_get_draw_cl_space(job, info->count); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); if (!vc4_update_compiled_shaders(vc4, info->mode)) { debug_warn_once("shader compile failed, skipping draw call.\n"); return; } vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | VC4_DIRTY_VTXSTATE | VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER | VC4_DIRTY_COMPILED_CS | VC4_DIRTY_COMPILED_VS | VC4_DIRTY_COMPILED_FS | vc4->prog.cs->uniform_dirty_bits | vc4->prog.vs->uniform_dirty_bits | vc4->prog.fs->uniform_dirty_bits)) || vc4->last_index_bias != info->index_bias) { vc4_emit_gl_shader_state(vc4, info, 0); } vc4->dirty = 0; /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ struct vc4_cl_out *bcl = cl_start(&job->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; struct pipe_resource *prsc; if (vc4->indexbuf.index_size == 4) { prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, info->count, &offset); index_size = 2; } else { if (vc4->indexbuf.user_buffer) { prsc = NULL; u_upload_data(vc4->uploader, 0, info->count * index_size, 4, vc4->indexbuf.user_buffer, &offset, &prsc); } else { prsc = vc4->indexbuf.buffer; } } struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&job->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); job->draw_calls_queued++; if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { uint32_t count = info->count; uint32_t start = info->start; uint32_t extra_index_bias = 0; while (count) { uint32_t this_count = count; uint32_t step = count; static const uint32_t max_verts = 65535; /* GFXH-515 / SW-5891: The binner emits 16 bit indices * for drawarrays, which means that if start + count > * 64k it would truncate the top bits. Work around * this by emitting a limited number of primitives at * a time and reemitting the shader state pointing * farther down the vertex attribute arrays. * * To do this properly for line loops or trifans, we'd * need to make a new VB containing the first vertex * plus whatever remainder. */ if (extra_index_bias) { cl_end(&job->bcl, bcl); vc4_emit_gl_shader_state(vc4, info, extra_index_bias); bcl = cl_start(&job->bcl); } if (start + count > max_verts) { switch (info->mode) { case PIPE_PRIM_POINTS: this_count = step = max_verts; break; case PIPE_PRIM_LINES: this_count = step = max_verts - (max_verts % 2); break; case PIPE_PRIM_LINE_STRIP: this_count = max_verts; step = max_verts - 1; break; case PIPE_PRIM_LINE_LOOP: this_count = max_verts; step = max_verts - 1; debug_warn_once("unhandled line loop " "looping behavior with " ">65535 verts\n"); break; case PIPE_PRIM_TRIANGLES: this_count = step = max_verts - (max_verts % 3); break; case PIPE_PRIM_TRIANGLE_STRIP: this_count = max_verts; step = max_verts - 2; break; default: debug_warn_once("unhandled primitive " "max vert count, truncating\n"); this_count = step = max_verts; } } cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&bcl, info->mode); cl_u32(&bcl, this_count); cl_u32(&bcl, start); job->draw_calls_queued++; count -= step; extra_index_bias += start + step; start = 0; } } cl_end(&job->bcl, bcl); /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 * workaround. */ assert(job->draw_calls_queued <= VC4_HW_2116_COUNT); if (vc4->zsa && vc4->framebuffer.zsbuf) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.zsbuf->texture); if (vc4->zsa->base.depth.enabled) { job->resolve |= PIPE_CLEAR_DEPTH; rsc->initialized_buffers = PIPE_CLEAR_DEPTH; } if (vc4->zsa->base.stencil[0].enabled) { job->resolve |= PIPE_CLEAR_STENCIL; rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; } } job->resolve |= PIPE_CLEAR_COLOR0; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }
/** * Pass the given program parameters to the graphics pipe as a * constant buffer. * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, gl_shader_stage stage) { enum pipe_shader_type shader_type = st_shader_stage_to_ptarget(stage); assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || shader_type == PIPE_SHADER_TESS_EVAL || shader_type == PIPE_SHADER_COMPUTE); /* update the ATI constants before rendering */ if (shader_type == PIPE_SHADER_FRAGMENT && st->fp->ati_fs) { struct ati_fragment_shader *ati_fs = st->fp->ati_fs; unsigned c; for (c = 0; c < MAX_NUM_FRAGMENT_CONSTANTS_ATI; c++) { if (ati_fs->LocalConstDef & (1 << c)) memcpy(params->ParameterValues[c], ati_fs->Constants[c], sizeof(GLfloat) * 4); else memcpy(params->ParameterValues[c], st->ctx->ATIFragmentShader.GlobalConstants[c], sizeof(GLfloat) * 4); } } /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ if (params->StateFlags) _mesa_load_state_parameters(st->ctx, params); _mesa_shader_write_subroutine_indices(st->ctx, stage); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. * Let's use a user buffer to avoid an unnecessary copy. */ if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, paramBytes, st->ctx->Const.UniformBufferOffsetAlignment, params->ParameterValues, &cb.buffer_offset, &cb.buffer); u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = params->ParameterValues; cb.buffer_offset = 0; } cb.buffer_size = paramBytes; if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __func__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } cso_set_constant_buffer(st->cso_context, shader_type, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); st->state.constants[shader_type].ptr = params->ParameterValues; st->state.constants[shader_type].size = paramBytes; } else if (st->state.constants[shader_type].ptr) { /* Unbind. */ st->state.constants[shader_type].ptr = NULL; st->state.constants[shader_type].size = 0; cso_set_constant_buffer(st->cso_context, shader_type, 0, NULL); } }
static void u_vbuf_upload_buffers(struct u_vbuf *mgr, int start_vertex, unsigned num_vertices, int start_instance, unsigned num_instances) { unsigned i; unsigned nr_velems = mgr->ve->count; unsigned nr_vbufs = mgr->nr_vertex_buffers; struct pipe_vertex_element *velems = mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; unsigned start_offset[PIPE_MAX_ATTRIBS]; unsigned end_offset[PIPE_MAX_ATTRIBS] = {0}; /* Determine how much data needs to be uploaded. */ for (i = 0; i < nr_velems; i++) { struct pipe_vertex_element *velem = &velems[i]; unsigned index = velem->vertex_buffer_index; struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; unsigned instance_div, first, size; /* Skip the buffers generated by translate. */ if (index == mgr->fallback_vbs[VB_VERTEX] || index == mgr->fallback_vbs[VB_INSTANCE] || index == mgr->fallback_vbs[VB_CONST]) { continue; } if (!vb->user_buffer) { continue; } instance_div = velem->instance_divisor; first = vb->buffer_offset + velem->src_offset; if (!vb->stride) { /* Constant attrib. */ size = mgr->ve->src_format_size[i]; } else if (instance_div) { /* Per-instance attrib. */ unsigned count = (num_instances + instance_div - 1) / instance_div; first += vb->stride * start_instance; size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; } else { /* Per-vertex attrib. */ first += vb->stride * start_vertex; size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; } /* Update offsets. */ if (!end_offset[index]) { start_offset[index] = first; end_offset[index] = first + size; } else { if (first < start_offset[index]) start_offset[index] = first; if (first + size > end_offset[index]) end_offset[index] = first + size; } } /* Upload buffers. */ for (i = 0; i < nr_vbufs; i++) { unsigned start, end = end_offset[i]; struct pipe_vertex_buffer *real_vb; const uint8_t *ptr; if (!end) { continue; } start = start_offset[i]; assert(start < end); real_vb = &mgr->real_vertex_buffer[i]; ptr = mgr->vertex_buffer[i].user_buffer; u_upload_data(mgr->uploader, start, end - start, ptr + start, &real_vb->buffer_offset, &real_vb->buffer); real_vb->buffer_offset -= start; } }
void st_setup_current(struct st_context *st, const struct st_vertex_program *vp, const struct st_vp_variant *vp_variant, struct pipe_vertex_element *velements, struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers) { struct gl_context *ctx = st->ctx; const GLbitfield inputs_read = vp_variant->vert_attrib_mask; /* Process values that should have better been uniforms in the application */ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx); if (curmask) { /* vertex program validation must be done before this */ const struct st_vertex_program *vp = st->vp; const ubyte *input_to_index = vp->input_to_index; /* For each attribute, upload the maximum possible size. */ GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4]; GLubyte *cursor = data; const unsigned bufidx = (*num_vbuffers)++; unsigned max_alignment = 1; while (curmask) { const gl_vert_attrib attr = u_bit_scan(&curmask); const struct gl_array_attributes *const attrib = _mesa_draw_current_attrib(ctx, attr); const unsigned size = attrib->Format._ElementSize; const unsigned alignment = util_next_power_of_two(size); max_alignment = MAX2(max_alignment, alignment); memcpy(cursor, attrib->Ptr, size); if (alignment != size) memset(cursor + size, 0, alignment - size); init_velement_lowered(vp, velements, &attrib->Format, cursor - data, 0, bufidx, input_to_index[attr]); cursor += alignment; } vbuffer[bufidx].is_user_buffer = false; vbuffer[bufidx].buffer.resource = NULL; /* vbuffer[bufidx].buffer_offset is set below */ vbuffer[bufidx].stride = 0; /* Use const_uploader for zero-stride vertex attributes, because * it may use a better memory placement than stream_uploader. * The reason is that zero-stride attributes can be fetched many * times (thousands of times), so a better placement is going to * perform better. */ struct u_upload_mgr *uploader = st->can_bind_const_buffer_as_vertex ? st->pipe->const_uploader : st->pipe->stream_uploader; u_upload_data(uploader, 0, cursor - data, max_alignment, data, &vbuffer[bufidx].buffer_offset, &vbuffer[bufidx].buffer.resource); /* Always unmap. The uploader might use explicit flushes. */ u_upload_unmap(uploader); } }
static enum pipe_error u_vbuf_upload_buffers(struct u_vbuf *mgr, int start_vertex, unsigned num_vertices, int start_instance, unsigned num_instances) { unsigned i; unsigned nr_velems = mgr->ve->count; struct pipe_vertex_element *velems = mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; unsigned start_offset[PIPE_MAX_ATTRIBS]; unsigned end_offset[PIPE_MAX_ATTRIBS]; uint32_t buffer_mask = 0; /* Determine how much data needs to be uploaded. */ for (i = 0; i < nr_velems; i++) { struct pipe_vertex_element *velem = &velems[i]; unsigned index = velem->vertex_buffer_index; struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; unsigned instance_div, first, size, index_bit; /* Skip the buffers generated by translate. */ if (index == mgr->fallback_vbs[VB_VERTEX] || index == mgr->fallback_vbs[VB_INSTANCE] || index == mgr->fallback_vbs[VB_CONST]) { continue; } if (!vb->user_buffer) { continue; } instance_div = velem->instance_divisor; first = vb->buffer_offset + velem->src_offset; if (!vb->stride) { /* Constant attrib. */ size = mgr->ve->src_format_size[i]; } else if (instance_div) { /* Per-instance attrib. */ unsigned count = (num_instances + instance_div - 1) / instance_div; first += vb->stride * start_instance; size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; } else { /* Per-vertex attrib. */ first += vb->stride * start_vertex; size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; } index_bit = 1 << index; /* Update offsets. */ if (!(buffer_mask & index_bit)) { start_offset[index] = first; end_offset[index] = first + size; } else { if (first < start_offset[index]) start_offset[index] = first; if (first + size > end_offset[index]) end_offset[index] = first + size; } buffer_mask |= index_bit; } /* Upload buffers. */ while (buffer_mask) { unsigned start, end; struct pipe_vertex_buffer *real_vb; const uint8_t *ptr; i = u_bit_scan(&buffer_mask); start = start_offset[i]; end = end_offset[i]; assert(start < end); real_vb = &mgr->real_vertex_buffer[i]; ptr = mgr->vertex_buffer[i].user_buffer; u_upload_data(mgr->pipe->stream_uploader, start, end - start, 4, ptr + start, &real_vb->buffer_offset, &real_vb->buffer); if (!real_vb->buffer) return PIPE_ERROR_OUT_OF_MEMORY; real_vb->buffer_offset -= start; } return PIPE_OK; }
/* Setup all vertex pipeline state, rasterizer state, and fragment shader * constants, and issue the draw call for PBO upload/download. * * The caller is responsible for saving and restoring state, as well as for * setting other fragment shader state (fragment shader, samplers), and * framebuffer/viewport/DSA/blend state. */ bool st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, unsigned surface_width, unsigned surface_height) { struct cso_context *cso = st->cso_context; /* Setup vertex and geometry shaders */ if (!st->pbo.vs) { st->pbo.vs = st_pbo_create_vs(st); if (!st->pbo.vs) return false; } if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) { st->pbo.gs = st_pbo_create_gs(st); if (!st->pbo.gs) return false; } cso_set_vertex_shader_handle(cso, st->pbo.vs); cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL); cso_set_tessctrl_shader_handle(cso, NULL); cso_set_tesseval_shader_handle(cso, NULL); /* Upload vertices */ { struct pipe_vertex_buffer vbo; struct pipe_vertex_element velem; float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f; float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f; float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f; float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f; float *verts = NULL; vbo.user_buffer = NULL; vbo.buffer = NULL; vbo.stride = 2 * sizeof(float); u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, &vbo.buffer_offset, &vbo.buffer, (void **) &verts); if (!verts) return false; verts[0] = x0; verts[1] = y0; verts[2] = x0; verts[3] = y1; verts[4] = x1; verts[5] = y0; verts[6] = x1; verts[7] = y1; u_upload_unmap(st->uploader); velem.src_offset = 0; velem.instance_divisor = 0; velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso); velem.src_format = PIPE_FORMAT_R32G32_FLOAT; cso_set_vertex_elements(cso, 1, &velem); cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo); pipe_resource_reference(&vbo.buffer, NULL); } /* Upload constants */ { struct pipe_constant_buffer cb; if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, sizeof(addr->constants), st->ctx->Const.UniformBufferOffsetAlignment, &addr->constants, &cb.buffer_offset, &cb.buffer); if (!cb.buffer) return false; u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = &addr->constants; cb.buffer_offset = 0; } cb.buffer_size = sizeof(addr->constants); cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); } /* Rasterizer state */ cso_set_rasterizer(cso, &st->pbo.raster); /* Disable stream output */ cso_set_stream_outputs(cso, 0, NULL, 0); if (addr->depth == 1) { cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); } else { cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4, 0, addr->depth); } return true; }