/* Setup all vertex pipeline state, rasterizer state, and fragment shader * constants, and issue the draw call for PBO upload/download. * * The caller is responsible for saving and restoring state, as well as for * setting other fragment shader state (fragment shader, samplers), and * framebuffer/viewport/DSA/blend state. */ bool st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, unsigned surface_width, unsigned surface_height) { struct cso_context *cso = st->cso_context; /* Setup vertex and geometry shaders */ if (!st->pbo.vs) { st->pbo.vs = st_pbo_create_vs(st); if (!st->pbo.vs) return false; } if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) { st->pbo.gs = st_pbo_create_gs(st); if (!st->pbo.gs) return false; } cso_set_vertex_shader_handle(cso, st->pbo.vs); cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL); cso_set_tessctrl_shader_handle(cso, NULL); cso_set_tesseval_shader_handle(cso, NULL); /* Upload vertices */ { struct pipe_vertex_buffer vbo; struct pipe_vertex_element velem; float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f; float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f; float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f; float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f; float *verts = NULL; vbo.user_buffer = NULL; vbo.buffer = NULL; vbo.stride = 2 * sizeof(float); u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, &vbo.buffer_offset, &vbo.buffer, (void **) &verts); if (!verts) return false; verts[0] = x0; verts[1] = y0; verts[2] = x0; verts[3] = y1; verts[4] = x1; verts[5] = y0; verts[6] = x1; verts[7] = y1; u_upload_unmap(st->uploader); velem.src_offset = 0; velem.instance_divisor = 0; velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso); velem.src_format = PIPE_FORMAT_R32G32_FLOAT; cso_set_vertex_elements(cso, 1, &velem); cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo); pipe_resource_reference(&vbo.buffer, NULL); } /* Upload constants */ { struct pipe_constant_buffer cb; if (st->constbuf_uploader) { cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, sizeof(addr->constants), st->ctx->Const.UniformBufferOffsetAlignment, &addr->constants, &cb.buffer_offset, &cb.buffer); if (!cb.buffer) return false; u_upload_unmap(st->constbuf_uploader); } else { cb.buffer = NULL; cb.user_buffer = &addr->constants; cb.buffer_offset = 0; } cb.buffer_size = sizeof(addr->constants); cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); } /* Rasterizer state */ cso_set_rasterizer(cso, &st->pbo.raster); /* Disable stream output */ cso_set_stream_outputs(cso, 0, NULL, 0); if (addr->depth == 1) { cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); } else { cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4, 0, addr->depth); } return true; }
static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && usage & PIPE_TRANSFER_WRITE && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } /* If discarding the entire range, discard the whole resource instead. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE && box->x == 0 && box->width == resource->width0) { usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); } /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, offset); } else { return NULL; /* error, shouldn't occur though */ } } /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } /* Using a staging buffer in GTT for larger reads is much faster. */ else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_WRITE) && rbuffer->domains == RADEON_DOMAIN_VRAM && r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { struct r600_resource *staging; staging = (struct r600_resource*) pipe_buffer_create( ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); if (staging) { /* Copy the VRAM buffer to the staging buffer. */ rctx->dma_copy(ctx, &staging->b.b, 0, box->x % R600_MAP_BUFFER_ALIGNMENT, 0, 0, resource, level, box); data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, 0); } } data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL, 0); }
static enum pipe_error emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) { const struct pipe_constant_buffer *cbuf; struct pipe_resource *dst_buffer = NULL; enum pipe_error ret = PIPE_OK; struct pipe_transfer *src_transfer; struct svga_winsys_surface *dst_handle; float extras[MAX_EXTRA_CONSTS][4]; unsigned extra_count, extra_size, extra_offset; unsigned new_buf_size; void *src_map = NULL, *dst_map; unsigned offset; const struct svga_shader_variant *variant; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY || shader == PIPE_SHADER_FRAGMENT); cbuf = &svga->curr.constbufs[shader][0]; switch (shader) { case PIPE_SHADER_VERTEX: variant = svga->state.hw_draw.vs; extra_count = svga_get_extra_vs_constants(svga, (float *) extras); break; case PIPE_SHADER_FRAGMENT: variant = svga->state.hw_draw.fs; extra_count = svga_get_extra_fs_constants(svga, (float *) extras); break; case PIPE_SHADER_GEOMETRY: variant = svga->state.hw_draw.gs; extra_count = svga_get_extra_gs_constants(svga, (float *) extras); break; default: assert(!"Unexpected shader type"); /* Don't return an error code since we don't want to keep re-trying * this function and getting stuck in an infinite loop. */ return PIPE_OK; } assert(variant); /* Compute extra constants size and offset in bytes */ extra_size = extra_count * 4 * sizeof(float); extra_offset = 4 * sizeof(float) * variant->extra_const_start; if (cbuf->buffer_size + extra_size == 0) return PIPE_OK; /* nothing to do */ /* Typically, the cbuf->buffer here is a user-space buffer so mapping * it is really cheap. If we ever get real HW buffers for constants * we should void mapping and instead use a ResourceCopy command. */ if (cbuf->buffer_size > 0) { src_map = pipe_buffer_map_range(&svga->pipe, cbuf->buffer, cbuf->buffer_offset, cbuf->buffer_size, PIPE_TRANSFER_READ, &src_transfer); assert(src_map); if (!src_map) { return PIPE_ERROR_OUT_OF_MEMORY; } } /* The new/dest buffer's size must be large enough to hold the original, * user-specified constants, plus the extra constants. * The size of the original constant buffer _should_ agree with what the * shader is expecting, but it might not (it's not enforced anywhere by * gallium). */ new_buf_size = MAX2(cbuf->buffer_size, extra_offset) + extra_size; /* According to the DX10 spec, the constant buffer size must be * in multiples of 16. */ new_buf_size = align(new_buf_size, 16); u_upload_alloc(svga->const0_upload, 0, new_buf_size, &offset, &dst_buffer, &dst_map); if (!dst_map) { if (src_map) pipe_buffer_unmap(&svga->pipe, src_transfer); return PIPE_ERROR_OUT_OF_MEMORY; } if (src_map) { memcpy(dst_map, src_map, cbuf->buffer_size); pipe_buffer_unmap(&svga->pipe, src_transfer); } if (extra_size) { assert(extra_offset + extra_size <= new_buf_size); memcpy((char *) dst_map + extra_offset, extras, extra_size); } u_upload_unmap(svga->const0_upload); /* Issue the SetSingleConstantBuffer command */ dst_handle = svga_buffer_handle(svga, dst_buffer); if (!dst_handle) { pipe_resource_reference(&dst_buffer, NULL); return PIPE_ERROR_OUT_OF_MEMORY; } assert(new_buf_size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, 0, /* index */ svga_shader_type(shader), dst_handle, offset, new_buf_size); if (ret != PIPE_OK) { pipe_resource_reference(&dst_buffer, NULL); return ret; } /* Save this const buffer until it's replaced in the future. * Otherwise, all references to the buffer will go away after the * command buffer is submitted, it'll get recycled and we will have * incorrect constant buffer bindings. */ pipe_resource_reference(&svga->state.hw_draw.constbuf[shader], dst_buffer); svga->state.hw_draw.default_constbuf_size[shader] = new_buf_size; pipe_resource_reference(&dst_buffer, NULL); return ret; }
static void setup_bitmap_vertex_data(struct st_context *st, bool normalized, int x, int y, int width, int height, float z, const float color[4], struct pipe_resource **vbuf, unsigned *vbuf_offset) { const GLfloat fb_width = (GLfloat)st->state.framebuffer.width; const GLfloat fb_height = (GLfloat)st->state.framebuffer.height; const GLfloat x0 = (GLfloat)x; const GLfloat x1 = (GLfloat)(x + width); const GLfloat y0 = (GLfloat)y; const GLfloat y1 = (GLfloat)(y + height); GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); GLuint i; float (*vertices)[3][4]; /**< vertex pos + color + texcoord */ if(!normalized) { sRight = (GLfloat) width; tBot = (GLfloat) height; } if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) { return; } /* Positions are in clip coords since we need to do clipping in case * the bitmap quad goes beyond the window bounds. */ vertices[0][0][0] = clip_x0; vertices[0][0][1] = clip_y0; vertices[0][2][0] = sLeft; vertices[0][2][1] = tTop; vertices[1][0][0] = clip_x1; vertices[1][0][1] = clip_y0; vertices[1][2][0] = sRight; vertices[1][2][1] = tTop; vertices[2][0][0] = clip_x1; vertices[2][0][1] = clip_y1; vertices[2][2][0] = sRight; vertices[2][2][1] = tBot; vertices[3][0][0] = clip_x0; vertices[3][0][1] = clip_y1; vertices[3][2][0] = sLeft; vertices[3][2][1] = tBot; /* same for all verts: */ for (i = 0; i < 4; i++) { vertices[i][0][2] = z; vertices[i][0][3] = 1.0f; vertices[i][1][0] = color[0]; vertices[i][1][1] = color[1]; vertices[i][1][2] = color[2]; vertices[i][1][3] = color[3]; vertices[i][2][2] = 0.0; /*R*/ vertices[i][2][3] = 1.0; /*Q*/ } u_upload_unmap(st->uploader); }
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) { struct si_descriptors *desc = &sctx->vertex_buffers; bool bound[SI_NUM_VERTEX_BUFFERS] = {}; unsigned i, count = sctx->vertex_elements->count; uint64_t va; uint32_t *ptr; if (!sctx->vertex_buffers_dirty) return true; if (!count || !sctx->vertex_elements) return true; /* Vertex buffer descriptors are the only ones which are uploaded * directly through a staging buffer and don't go through * the fine-grained upload path. */ u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, (void**)&ptr); if (!desc->buffer) return false; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); assert(count <= SI_NUM_VERTEX_BUFFERS); for (i = 0; i < count; i++) { struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i]; struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; unsigned offset; uint32_t *desc = &ptr[i*4]; if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) { memset(desc, 0, 16); continue; } vb = &sctx->vertex_buffer[ve->vertex_buffer_index]; rbuffer = (struct r600_resource*)vb->buffer; if (!rbuffer) { memset(desc, 0, 16); continue; } offset = vb->buffer_offset + ve->src_offset; va = rbuffer->gpu_address + offset; /* Fill in T# buffer resource description */ desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride); if (sctx->b.chip_class <= CIK && vb->stride) /* Round up by rounding down and adding 1 */ desc[2] = (vb->buffer->width0 - offset - sctx->vertex_elements->format_size[i]) / vb->stride + 1; else desc[2] = vb->buffer->width0 - offset; desc[3] = sctx->vertex_elements->rsrc_word3[i]; if (!bound[ve->vertex_buffer_index]) { radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, (struct r600_resource*)vb->buffer, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); bound[ve->vertex_buffer_index] = true; } } /* Don't flush the const cache. It would have a very negative effect * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ desc->pointer_dirty = true; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); sctx->vertex_buffers_dirty = false; return true; }