static int emit_const( struct svga_context *svga, int unit, int i, const float *value ) { int ret = PIPE_OK; if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) { if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %d: %f %f %f %f\n", __FUNCTION__, unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", i, value[0], value[1], value[2], value[3]); ret = SVGA3D_SetShaderConst( svga->swc, i, svga_shader_type(unit), SVGA3D_CONST_TYPE_FLOAT, value ); if (ret) return ret; memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)); } return ret; }
/** * Check and emit one shader constant register. * \param shader PIPE_SHADER_FRAGMENT or PIPE_SHADER_VERTEX * \param i which float[4] constant to change * \param value the new float[4] value */ static enum pipe_error emit_const(struct svga_context *svga, unsigned shader, unsigned i, const float *value) { enum pipe_error ret = PIPE_OK; assert(shader < PIPE_SHADER_TYPES); assert(i < SVGA3D_CONSTREG_MAX); if (memcmp(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float)) != 0) { if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %u: %f %f %f %f\n", __FUNCTION__, shader == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", i, value[0], value[1], value[2], value[3]); ret = SVGA3D_SetShaderConst( svga->swc, i, svga_shader_type(shader), SVGA3D_CONST_TYPE_FLOAT, value ); if (ret != PIPE_OK) return ret; memcpy(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float)); } return ret; }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
/* * Check and emit a range of shader constant registers, trying to coalesce * successive shader constant updates in a single command in order to save * space on the command buffer. This is a HWv8 feature. */ static enum pipe_error emit_const_range(struct svga_context *svga, unsigned shader, unsigned offset, unsigned count, const float (*values)[4]) { unsigned i, j; enum pipe_error ret; #ifdef DEBUG if (offset + count > SVGA3D_CONSTREG_MAX) { debug_printf("svga: too many constants (offset + count = %u)\n", offset + count); } #endif if (offset > SVGA3D_CONSTREG_MAX) { /* This isn't OK, but if we propagate an error all the way up we'll * just get into more trouble. * XXX note that offset is always zero at this time so this is moot. */ return PIPE_OK; } if (offset + count > SVGA3D_CONSTREG_MAX) { /* Just drop the extra constants for now. * Ideally we should not have allowed the app to create a shader * that exceeds our constant buffer size but there's no way to * express that in gallium at this time. */ count = SVGA3D_CONSTREG_MAX - offset; } i = 0; while (i < count) { if (memcmp(svga->state.hw_draw.cb[shader][offset + i], values[i], 4 * sizeof(float)) != 0) { /* Found one dirty constant */ if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %d: %f %f %f %f\n", __FUNCTION__, shader == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", offset + i, values[i][0], values[i][1], values[i][2], values[i][3]); /* Look for more consecutive dirty constants. */ j = i + 1; while (j < count && j < i + MAX_CONST_REG_COUNT && memcmp(svga->state.hw_draw.cb[shader][offset + j], values[j], 4 * sizeof(float)) != 0) { if (SVGA_DEBUG & DEBUG_CONSTS) debug_printf("%s %s %d: %f %f %f %f\n", __FUNCTION__, shader == PIPE_SHADER_VERTEX ? "VERT" : "FRAG", offset + j, values[j][0], values[j][1], values[j][2], values[j][3]); ++j; } assert(j >= i + 1); /* Send them all together. */ ret = SVGA3D_SetShaderConsts(svga->swc, offset + i, j - i, svga_shader_type(shader), SVGA3D_CONST_TYPE_FLOAT, values + i); if (ret != PIPE_OK) { return ret; } /* * Local copy of the hardware state. */ memcpy(svga->state.hw_draw.cb[shader][offset + i], values[i], (j - i) * 4 * sizeof(float)); i = j + 1; } else { ++i; } } return PIPE_OK; }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } if (size % 16 != 0) { /* GL's buffer range sizes can be any number of bytes but the * SVGA3D device requires a multiple of 16 bytes. */ const unsigned total_size = buffer->b.b.width0; if (offset + align(size, 16) <= total_size) { /* round up size to multiple of 16 */ size = align(size, 16); } else { /* round down to mulitple of 16 (this may cause rendering problems * but should avoid a device error). */ size &= ~15; } } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; svga->hud.num_const_buf_updates++; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
static enum pipe_error emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) { const struct pipe_constant_buffer *cbuf; struct pipe_resource *dst_buffer = NULL; enum pipe_error ret = PIPE_OK; struct pipe_transfer *src_transfer; struct svga_winsys_surface *dst_handle; float extras[MAX_EXTRA_CONSTS][4]; unsigned extra_count, extra_size, extra_offset; unsigned new_buf_size; void *src_map = NULL, *dst_map; unsigned offset; const struct svga_shader_variant *variant; unsigned alloc_buf_size; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY || shader == PIPE_SHADER_FRAGMENT); cbuf = &svga->curr.constbufs[shader][0]; switch (shader) { case PIPE_SHADER_VERTEX: variant = svga->state.hw_draw.vs; extra_count = svga_get_extra_vs_constants(svga, (float *) extras); break; case PIPE_SHADER_FRAGMENT: variant = svga->state.hw_draw.fs; extra_count = svga_get_extra_fs_constants(svga, (float *) extras); break; case PIPE_SHADER_GEOMETRY: variant = svga->state.hw_draw.gs; extra_count = svga_get_extra_gs_constants(svga, (float *) extras); break; default: assert(!"Unexpected shader type"); /* Don't return an error code since we don't want to keep re-trying * this function and getting stuck in an infinite loop. */ return PIPE_OK; } assert(variant); /* Compute extra constants size and offset in bytes */ extra_size = extra_count * 4 * sizeof(float); extra_offset = 4 * sizeof(float) * variant->extra_const_start; if (cbuf->buffer_size + extra_size == 0) return PIPE_OK; /* nothing to do */ /* Typically, the cbuf->buffer here is a user-space buffer so mapping * it is really cheap. If we ever get real HW buffers for constants * we should void mapping and instead use a ResourceCopy command. */ if (cbuf->buffer_size > 0) { src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer, cbuf->buffer_offset, cbuf->buffer_size, PIPE_TRANSFER_READ, &src_transfer); assert(src_map); if (!src_map) { return PIPE_ERROR_OUT_OF_MEMORY; } } /* The new/dest buffer's size must be large enough to hold the original, * user-specified constants, plus the extra constants. * The size of the original constant buffer _should_ agree with what the * shader is expecting, but it might not (it's not enforced anywhere by * gallium). */ new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size; /* According to the DX10 spec, the constant buffer size must be * in multiples of 16. */ new_buf_size = align(new_buf_size, 16); /* Constant buffer size in the upload buffer must be in multiples of 256. * In order to maximize the chance of merging the upload buffer chunks * when svga_buffer_add_range() is called, * the allocate buffer size needs to be in multiples of 256 as well. * Otherwise, since there is gap between each dirty range of the upload buffer, * each dirty range will end up in its own UPDATE_GB_IMAGE command. */ alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT); u_upload_alloc(svga->const0_upload, 0, alloc_buf_size, CONST0_UPLOAD_ALIGNMENT, &offset, &dst_buffer, &dst_map); if (!dst_map) { if (src_map) pipe_buffer_unmap(&svga->pipe, src_transfer); return PIPE_ERROR_OUT_OF_MEMORY; } if (src_map) { memcpy(dst_map, src_map, cbuf->buffer_size); pipe_buffer_unmap(&svga->pipe, src_transfer); } if (extra_size) { assert(extra_offset + extra_size <= new_buf_size); memcpy((char *) dst_map + extra_offset, extras, extra_size); } u_upload_unmap(svga->const0_upload); /* Issue the SetSingleConstantBuffer command */ dst_handle = svga_buffer_handle(svga, dst_buffer); if (!dst_handle) { pipe_resource_reference(&dst_buffer, NULL); return PIPE_ERROR_OUT_OF_MEMORY; } assert(new_buf_size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, 0, /* index */ svga_shader_type(shader), dst_handle, offset, new_buf_size); if (ret != PIPE_OK) { pipe_resource_reference(&dst_buffer, NULL); return ret; } /* Save this const buffer until it's replaced in the future. * Otherwise, all references to the buffer will go away after the * command buffer is submitted, it'll get recycled and we will have * incorrect constant buffer bindings. */ pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer); svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size; pipe_resource_reference(&dst_buffer, NULL); svga->hud.num_const_buf_updates++; return ret; }
static enum pipe_error update_samplers(struct svga_context *svga, unsigned dirty ) { enum pipe_error ret = PIPE_OK; enum pipe_shader_type shader; if (!svga_have_vgpu10(svga)) return PIPE_OK; for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { const unsigned count = svga->curr.num_samplers[shader]; SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS]; unsigned i; unsigned nsamplers; for (i = 0; i < count; i++) { bool fs_shadow = false; /* _NEW_FS */ if (shader == PIPE_SHADER_FRAGMENT) { struct svga_shader_variant *fs = svga->state.hw_draw.fs; /* If the fragment shader is doing the shadow comparison * for this texture unit, don't enable shadow compare in * the texture sampler state. */ if (fs && (fs->fs_shadow_compare_units & (1 << i))) { fs_shadow = true; } } if (svga->curr.sampler[shader][i]) { ids[i] = svga->curr.sampler[shader][i]->id[fs_shadow]; assert(ids[i] != SVGA3D_INVALID_ID); } else { ids[i] = SVGA3D_INVALID_ID; } } for (; i < svga->state.hw_draw.num_samplers[shader]; i++) { ids[i] = SVGA3D_INVALID_ID; } nsamplers = MAX2(svga->state.hw_draw.num_samplers[shader], count); if (nsamplers > 0) { if (count != svga->state.hw_draw.num_samplers[shader] || memcmp(ids, svga->state.hw_draw.samplers[shader], count * sizeof(ids[0])) != 0) { /* HW state is really changing */ ret = SVGA3D_vgpu10_SetSamplers(svga->swc, nsamplers, 0, /* start */ svga_shader_type(shader), /* type */ ids); if (ret != PIPE_OK) return ret; memcpy(svga->state.hw_draw.samplers[shader], ids, nsamplers * sizeof(ids[0])); svga->state.hw_draw.num_samplers[shader] = count; } } } /* Handle polygon stipple sampler texture */ if (svga->curr.rast->templ.poly_stipple_enable) { const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; struct svga_sampler_state *sampler = svga->polygon_stipple.sampler; assert(sampler); if (!sampler) { return PIPE_OK; /* probably out of memory */ } if (svga->state.hw_draw.samplers[PIPE_SHADER_FRAGMENT][unit] != sampler->id[0]) { ret = SVGA3D_vgpu10_SetSamplers(svga->swc, 1, /* count */ unit, /* start */ SVGA3D_SHADERTYPE_PS, &sampler->id[0]); if (ret != PIPE_OK) return ret; /* save the polygon stipple sampler in the hw draw state */ svga->state.hw_draw.samplers[PIPE_SHADER_FRAGMENT][unit] = sampler->id[0]; } } return ret; }
static enum pipe_error update_sampler_resources(struct svga_context *svga, unsigned dirty) { enum pipe_error ret = PIPE_OK; enum pipe_shader_type shader; if (!svga_have_vgpu10(svga)) return PIPE_OK; for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) { SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS]; struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; unsigned count; unsigned nviews; unsigned i; count = svga->curr.num_sampler_views[shader]; for (i = 0; i < count; i++) { struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(svga->curr.sampler_views[shader][i]); if (sv) { surfaces[i] = svga_resource_handle(sv->base.texture); ret = svga_validate_pipe_sampler_view(svga, sv); if (ret != PIPE_OK) return ret; assert(sv->id != SVGA3D_INVALID_ID); ids[i] = sv->id; sampler_views[i] = &sv->base; } else { surfaces[i] = NULL; ids[i] = SVGA3D_INVALID_ID; sampler_views[i] = NULL; } } for (; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { ids[i] = SVGA3D_INVALID_ID; surfaces[i] = NULL; sampler_views[i] = NULL; } /* Number of ShaderResources that need to be modified. This includes * the one that need to be unbound. */ nviews = MAX2(svga->state.hw_draw.num_sampler_views[shader], count); if (nviews > 0) { if (count != svga->state.hw_draw.num_sampler_views[shader] || memcmp(sampler_views, svga->state.hw_draw.sampler_views[shader], count * sizeof(sampler_views[0])) != 0) { SVGA3dShaderResourceViewId *pIds = ids; struct svga_winsys_surface **pSurf = surfaces; unsigned numSR = 0; /* Loop through the sampler view list to only emit * the sampler views that are not already in the * corresponding entries in the device's * shader resource list. */ for (i = 0; i < nviews; i++) { boolean emit; emit = sampler_views[i] == svga->state.hw_draw.sampler_views[shader][i]; if (!emit && i == nviews-1) { /* Include the last sampler view in the next emit * if it is different. */ emit = TRUE; numSR++; i++; } if (emit) { /* numSR can only be 0 if the first entry of the list * is the same as the one in the device list. * In this case, * there is nothing to send yet. */ if (numSR) { ret = SVGA3D_vgpu10_SetShaderResources( svga->swc, svga_shader_type(shader), i - numSR, /* startView */ numSR, pIds, pSurf); if (ret != PIPE_OK) return ret; } pIds += (numSR + 1); pSurf += (numSR + 1); numSR = 0; } else numSR++; } /* Save referenced sampler views in the hw draw state. */ svga->state.hw_draw.num_sampler_views[shader] = count; for (i = 0; i < nviews; i++) { pipe_sampler_view_reference( &svga->state.hw_draw.sampler_views[shader][i], sampler_views[i]); } } } } /* Handle polygon stipple sampler view */ if (svga->curr.rast->templ.poly_stipple_enable) { const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit; struct svga_pipe_sampler_view *sv = svga->polygon_stipple.sampler_view; struct svga_winsys_surface *surface; assert(sv); if (!sv) { return PIPE_OK; /* probably out of memory */ } ret = svga_validate_pipe_sampler_view(svga, sv); if (ret != PIPE_OK) return ret; surface = svga_resource_handle(sv->base.texture); ret = SVGA3D_vgpu10_SetShaderResources( svga->swc, svga_shader_type(PIPE_SHADER_FRAGMENT), unit, /* startView */ 1, &sv->id, &surface); } return ret; }