int main(int argc, char **argv) { struct fbdemos_scaffold *fbs = 0; fbdemo_init(&fbs); int width = fbs->width; int height = fbs->height; struct pipe_context *pipe = fbs->pipe; /* resources */ struct pipe_resource *rt_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_B8G8R8X8_UNORM, width, height, 0); struct pipe_resource *z_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_Z16_UNORM, width, height, 0); struct pipe_resource *vtx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); struct pipe_resource *idx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); /* bind render target to framebuffer */ etna_fb_bind_resource(&fbs->fb, rt_resource); /* Phew, now we got all the memory we need. * Write interleaved attribute vertex stream. * Unlike the GL example we only do this once, not every time glDrawArrays is called, the same would be accomplished * from GL by using a vertex buffer object. */ float *vVertices; float *vNormals; float *vTexCoords; uint16_t *vIndices; int numVertices = 0; int numIndices = esGenSphere(80, 1.0f, &vVertices, &vNormals, &vTexCoords, &vIndices, &numVertices); unsigned vtxStride = 3+3+2; assert((numVertices * vtxStride*4) < VERTEX_BUFFER_SIZE); struct pipe_transfer *vtx_transfer = 0; float *vtx_logical = pipe_buffer_map(pipe, vtx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &vtx_transfer); for(int vert=0; vert<numVertices; ++vert) { int dest_idx = vert * vtxStride; for(int comp=0; comp<3; ++comp) vtx_logical[dest_idx+comp+0] = vVertices[vert*3 + comp]; /* 0 */ for(int comp=0; comp<3; ++comp) vtx_logical[dest_idx+comp+3] = vNormals[vert*3 + comp]; /* 1 */ for(int comp=0; comp<2; ++comp) vtx_logical[dest_idx+comp+6] = vTexCoords[vert*2 + comp]; /* 2 */ } pipe_buffer_unmap(pipe, vtx_transfer); assert((numIndices * 2) < VERTEX_BUFFER_SIZE); struct pipe_transfer *idx_transfer = 0; void *idx_logical = pipe_buffer_map(pipe, idx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &idx_transfer); memcpy(idx_logical, vIndices, numIndices*sizeof(uint16_t)); pipe_buffer_unmap(pipe, idx_transfer); /* compile gallium3d states */ void *blend = pipe->create_blend_state(pipe, &(struct pipe_blend_state) { .rt[0] = { .blend_enable = 0, .rgb_func = PIPE_BLEND_ADD, .rgb_src_factor = PIPE_BLENDFACTOR_ONE, .rgb_dst_factor = PIPE_BLENDFACTOR_ZERO, .alpha_func = PIPE_BLEND_ADD, .alpha_src_factor = PIPE_BLENDFACTOR_ONE, .alpha_dst_factor = PIPE_BLENDFACTOR_ZERO, .colormask = 0xf } });
static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct r600_context *rctx = CALLOC_STRUCT(r600_context); struct r600_screen* rscreen = (struct r600_screen *)screen; struct radeon_winsys *ws = rscreen->b.ws; if (!rctx) return NULL; rctx->b.b.screen = screen; assert(!priv); rctx->b.b.priv = NULL; /* for threaded_context_unwrap_sync */ rctx->b.b.destroy = r600_destroy_context; rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty; if (!r600_common_context_init(&rctx->b, &rscreen->b, flags)) goto fail; rctx->screen = rscreen; LIST_INITHEAD(&rctx->texture_buffers); r600_init_blit_functions(rctx); if (rscreen->b.info.has_hw_decode) { rctx->b.b.create_video_codec = r600_uvd_create_decoder; rctx->b.b.create_video_buffer = r600_video_buffer_create; } else { rctx->b.b.create_video_codec = vl_create_decoder; rctx->b.b.create_video_buffer = vl_video_buffer_create; } if (getenv("R600_TRACE")) rctx->is_debug = true; r600_init_common_state_functions(rctx); switch (rctx->b.chip_class) { case R600: case R700: r600_init_state_functions(rctx); r600_init_atom_start_cs(rctx); rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = rctx->b.chip_class == R700 ? r700_create_resolve_blend(rctx) : r600_create_resolve_blend(rctx); rctx->custom_blend_decompress = r600_create_decompress_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV620 || rctx->b.family == CHIP_RS780 || rctx->b.family == CHIP_RS880 || rctx->b.family == CHIP_RV710); break; case EVERGREEN: case CAYMAN: evergreen_init_state_functions(rctx); evergreen_init_atom_start_cs(rctx); evergreen_init_atom_start_compute_cs(rctx); rctx->custom_dsa_flush = evergreen_create_db_flush_dsa(rctx); rctx->custom_blend_resolve = evergreen_create_resolve_blend(rctx); rctx->custom_blend_decompress = evergreen_create_decompress_blend(rctx); rctx->custom_blend_fastclear = evergreen_create_fastclear_blend(rctx); rctx->has_vertex_cache = !(rctx->b.family == CHIP_CEDAR || rctx->b.family == CHIP_PALM || rctx->b.family == CHIP_SUMO || rctx->b.family == CHIP_SUMO2 || rctx->b.family == CHIP_CAICOS || rctx->b.family == CHIP_CAYMAN || rctx->b.family == CHIP_ARUBA); rctx->append_fence = pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, 32); break; default: R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); goto fail; } rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, r600_context_gfx_flush, rctx); rctx->b.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 0, PIPE_USAGE_DEFAULT, 0, FALSE); if (!rctx->allocator_fetch_shader) goto fail; rctx->isa = calloc(1, sizeof(struct r600_isa)); if (!rctx->isa || r600_isa_init(rctx, rctx->isa)) goto fail; if (rscreen->b.debug_flags & DBG_FORCE_DMA) rctx->b.b.resource_copy_region = rctx->b.dma_copy; rctx->blitter = util_blitter_create(&rctx->b.b); if (rctx->blitter == NULL) goto fail; util_blitter_set_texture_multisample(rctx->blitter, rscreen->has_msaa); rctx->blitter->draw_rectangle = r600_draw_rectangle; r600_begin_new_cs(rctx); rctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&rctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); return &rctx->b.b; fail: r600_destroy_context(&rctx->b.b); return NULL; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; int shader, i; if (sctx == NULL) return NULL; sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.b.flush = si_flush_from_st; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; si_init_blit_functions(sctx); si_init_compute_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.rings.gfx.cs = sctx->b.ws->cs_create(sctx->b.ws, RING_GFX, NULL); sctx->b.rings.gfx.flush = si_flush_from_winsys; si_init_all_descriptors(sctx); /* Initialize cache_flush. */ sctx->cache_flush = si_atom_cache_flush; sctx->atoms.cache_flush = &sctx->cache_flush; sctx->atoms.streamout_begin = &sctx->b.streamout.begin_atom; switch (sctx->b.chip_class) { case SI: case CIK: si_init_state_functions(sctx); si_init_config(sctx); break; default: R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class); goto fail; } sctx->b.ws->cs_set_flush_callback(sctx->b.rings.gfx.cs, si_flush_from_winsys, sctx); sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->dummy_pixel_shader = util_make_fragment_cloneinput_shader(&sctx->b.b, 0, TGSI_SEMANTIC_GENERIC, TGSI_INTERPOLATE_CONSTANT); sctx->b.b.bind_fs_state(&sctx->b.b, sctx->dummy_pixel_shader); /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0); } return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); return NULL; }
while(num_tile_pipes--) { i = backend_map & item_mask; mask |= (1<<i); backend_map >>= item_width; } if (mask != 0) { ctx->backend_mask = mask; return; } } /* otherwise backup path for older kernels */ /* create buffer for event data */ buffer = (struct r600_resource*) pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, ctx->max_db*16); if (!buffer) goto err; va = r600_resource_va(&ctx->screen->screen, (void*)buffer); /* initialize buffer with zeroes */ results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); if (results) { memset(results, 0, ctx->max_db * 4 * 4); ctx->ws->buffer_unmap(buffer->cs_buf); /* emit EVENT_WRITE for ZPASS_DONE */ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); cs->buf[cs->cdw++] = va;
/** The init function of the MLAA filter. */ static bool pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n, unsigned int val, bool iscolor) { struct pipe_box box; struct pipe_resource res; char *tmp_text = NULL; tmp_text = CALLOC(sizeof(blend2fs_1) + sizeof(blend2fs_2) + IMM_SPACE, sizeof(char)); if (tmp_text == NULL) { pp_debug("Failed to allocate shader space\n"); return FALSE; } ppq->constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, sizeof(constants)); if (ppq->constbuf == NULL) { pp_debug("Failed to allocate constant buffer\n"); goto fail; } pp_debug("mlaa: using %u max search steps\n", val); util_sprintf(tmp_text, "%s" "IMM FLT32 { %.8f, 0.0000, 0.0000, 0.0000}\n" "%s\n", blend2fs_1, (float) val, blend2fs_2); memset(&res, 0, sizeof(res)); res.target = PIPE_TEXTURE_2D; res.format = PIPE_FORMAT_R8G8_UNORM; res.width0 = res.height0 = 165; res.bind = PIPE_BIND_SAMPLER_VIEW; res.usage = PIPE_USAGE_DEFAULT; res.depth0 = res.array_size = res.nr_samples = 1; if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format, res.target, 1, res.bind)) pp_debug("Areamap format not supported\n"); ppq->areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res); if (ppq->areamaptex == NULL) { pp_debug("Failed to allocate area map texture\n"); goto fail; } u_box_2d(0, 0, 165, 165, &box); ppq->p->pipe->transfer_inline_write(ppq->p->pipe, ppq->areamaptex, 0, PIPE_TRANSFER_WRITE, &box, areamap, 165 * 2, sizeof(areamap)); ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true, "offsetvs"); if (iscolor) ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, color1fs, false, "color1fs"); else ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, depth1fs, false, "depth1fs"); ppq->shaders[n][3] = pp_tgsi_to_state(ppq->p->pipe, tmp_text, false, "blend2fs"); ppq->shaders[n][4] = pp_tgsi_to_state(ppq->p->pipe, neigh3fs, false, "neigh3fs"); FREE(tmp_text); return TRUE; fail: FREE(tmp_text); /* * Call the common free function for destruction of partially initialized * resources. */ pp_jimenezmlaa_free(ppq, n); return FALSE; }
static void st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height) { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; struct cso_context *cso = ctx->st->cso_context; struct pipe_resource *vbuffer; struct pipe_transfer *vbuffer_transfer; GLuint i, numTexCoords, numAttribs; GLboolean emitColor; uint semantic_names[2 + MAX_TEXTURE_UNITS]; uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; GLbitfield inputs = VERT_BIT_POS; st_validate_state(st); /* determine if we need vertex color */ if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0) emitColor = GL_TRUE; else emitColor = GL_FALSE; /* determine how many enabled sets of texcoords */ numTexCoords = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { inputs |= VERT_BIT_TEX(i); numTexCoords++; } } /* total number of attributes per vertex */ numAttribs = 1 + emitColor + numTexCoords; /* create the vertex buffer */ vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, numAttribs * 4 * 4 * sizeof(GLfloat)); /* load vertex buffer */ { #define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \ do { \ GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \ assert(k < 4 * 4 * numAttribs); \ vbuf[k + 0] = X; \ vbuf[k + 1] = Y; \ vbuf[k + 2] = Z; \ vbuf[k + 3] = W; \ } while (0) const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer, PIPE_TRANSFER_WRITE, &vbuffer_transfer); GLuint attr; z = CLAMP(z, 0.0f, 1.0f); /* positions (in clip coords) */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLfloat fb_width = (GLfloat)fb->Width; const GLfloat fb_height = (GLfloat)fb->Height; const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */ SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */ SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */ SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */ semantic_names[0] = TGSI_SEMANTIC_POSITION; semantic_indexes[0] = 0; } /* colors */ if (emitColor) { const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); semantic_names[1] = TGSI_SEMANTIC_COLOR; semantic_indexes[1] = 0; attr = 2; } else { attr = 1; } /* texcoords */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current; struct gl_texture_image *img = obj->Image[0][obj->BaseLevel]; const GLfloat wt = (GLfloat) img->Width; const GLfloat ht = (GLfloat) img->Height; const GLfloat s0 = obj->CropRect[0] / wt; const GLfloat t0 = obj->CropRect[1] / ht; const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt; const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ semantic_names[attr] = TGSI_SEMANTIC_GENERIC; semantic_indexes[attr] = 0; attr++; } } pipe_buffer_unmap(pipe, vbuffer_transfer); #undef SET_ATTRIB } cso_save_viewport(cso); cso_save_vertex_shader(cso); cso_save_vertex_elements(cso); cso_save_vertex_buffers(cso); { void *vs = lookup_shader(pipe, numAttribs, semantic_names, semantic_indexes); cso_set_vertex_shader_handle(cso, vs); } for (i = 0; i < numAttribs; i++) { velements[i].src_offset = i * 4 * sizeof(float); velements[i].instance_divisor = 0; velements[i].vertex_buffer_index = 0; velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } cso_set_vertex_elements(cso, numAttribs, velements); /* viewport state: viewport matching window dims */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); const GLfloat width = (GLfloat)fb->Width; const GLfloat height = (GLfloat)fb->Height; struct pipe_viewport_state vp; vp.scale[0] = 0.5f * width; vp.scale[1] = height * (invert ? -0.5f : 0.5f); vp.scale[2] = 1.0f; vp.scale[3] = 1.0f; vp.translate[0] = 0.5f * width; vp.translate[1] = 0.5f * height; vp.translate[2] = 0.0f; vp.translate[3] = 0.0f; cso_set_viewport(cso, &vp); } util_draw_vertex_buffer(pipe, cso, vbuffer, 0, /* offset */ PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ numAttribs); /* attribs/vert */ pipe_resource_reference(&vbuffer, NULL); /* restore state */ cso_restore_viewport(cso); cso_restore_vertex_shader(cso); cso_restore_vertex_elements(cso); cso_restore_vertex_buffers(cso); }
int main(int argc, char **argv) { struct fbdemos_scaffold *fbs = 0; fbdemo_init(&fbs); int width = fbs->width; int height = fbs->height; struct pipe_context *pipe = fbs->pipe; /* resources */ struct pipe_resource *rt_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_B8G8R8X8_UNORM, width, height, 0); struct pipe_resource *z_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_S8_UINT_Z24_UNORM, width, height, 0); struct pipe_resource *vtx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); struct pipe_resource *idx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); /* bind render target to framebuffer */ etna_fb_bind_resource(fbs, rt_resource); /* vertex / index buffer setup */ struct pipe_transfer *vtx_transfer = 0; float *vtx_logical = pipe_buffer_map(pipe, vtx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &vtx_transfer); assert(vtx_logical); for(int vert=0; vert<NUM_VERTICES; ++vert) { int dest_idx = vert * 3; for(int comp=0; comp<3; ++comp) vtx_logical[dest_idx+comp+0] = vVertices[vert*3 + comp]; /* 0 */ } pipe_buffer_unmap(pipe, vtx_transfer); struct pipe_transfer *idx_transfer = 0; void *idx_logical = pipe_buffer_map(pipe, idx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &idx_transfer); assert(idx_logical); memcpy(idx_logical, indices, sizeof(indices)); pipe_buffer_unmap(pipe, idx_transfer); struct pipe_vertex_buffer vertex_buf_desc = { .stride = (3)*4, .buffer_offset = 0, .buffer = vtx_resource, .user_buffer = 0 }; struct pipe_vertex_element pipe_vertex_elements[] = { { /* positions */ .src_offset = 0, .instance_divisor = 0, .vertex_buffer_index = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT }, }; void *vertex_elements = pipe->create_vertex_elements_state(pipe, sizeof(pipe_vertex_elements)/sizeof(pipe_vertex_elements[0]), pipe_vertex_elements); struct pipe_index_buffer index_buf_desc = { .index_size = 1, .offset = 0, .buffer = idx_resource, .user_buffer = 0 }; /* compile gallium3d states */ void *blend = pipe->create_blend_state(pipe, &(struct pipe_blend_state) { .rt[0] = { .blend_enable = 0, .rgb_func = PIPE_BLEND_ADD, .rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA, .rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA, .alpha_func = PIPE_BLEND_ADD, .alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA, .alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA, .colormask = 0xf } }); void *sampler = pipe->create_sampler_state(pipe, &(struct pipe_sampler_state) { .wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE, .wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE, .wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE, .min_img_filter = PIPE_TEX_FILTER_LINEAR, .min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR, .mag_img_filter = PIPE_TEX_FILTER_LINEAR, .normalized_coords = 1, .lod_bias = 0.0f, .min_lod = 0.0f, .max_lod=1000.0f });
static GLuint setup_bitmap_vertex_data(struct st_context *st, bool normalized, int x, int y, int width, int height, float z, const float color[4]) { struct pipe_context *pipe = st->pipe; const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLfloat fb_width = (GLfloat)fb->Width; const GLfloat fb_height = (GLfloat)fb->Height; const GLfloat x0 = (GLfloat)x; const GLfloat x1 = (GLfloat)(x + width); const GLfloat y0 = (GLfloat)y; const GLfloat y1 = (GLfloat)(y + height); GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */ GLuint i; if(!normalized) { sRight = width; tBot = height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as * no_flush) updates to buffers where we know there is no conflict * with previous data. Currently using max_slots > 1 will cause * synchronous rendering if the driver flushes its command buffers * between one bitmap and the next. Our flush hook below isn't * sufficient to catch this as the driver doesn't tell us when it * flushes its own command buffers. Until this gets fixed, pay the * price of allocating a new buffer for each bitmap cache-flush to * avoid synchronous rendering. */ if (st->bitmap.vbuf_slot >= max_slots) { pipe_resource_reference(&st->bitmap.vbuf, NULL); st->bitmap.vbuf_slot = 0; } if (!st->bitmap.vbuf) { st->bitmap.vbuf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, max_slots * sizeof(st->bitmap.vertices)); } /* Positions are in clip coords since we need to do clipping in case * the bitmap quad goes beyond the window bounds. */ st->bitmap.vertices[0][0][0] = clip_x0; st->bitmap.vertices[0][0][1] = clip_y0; st->bitmap.vertices[0][2][0] = sLeft; st->bitmap.vertices[0][2][1] = tTop; st->bitmap.vertices[1][0][0] = clip_x1; st->bitmap.vertices[1][0][1] = clip_y0; st->bitmap.vertices[1][2][0] = sRight; st->bitmap.vertices[1][2][1] = tTop; st->bitmap.vertices[2][0][0] = clip_x1; st->bitmap.vertices[2][0][1] = clip_y1; st->bitmap.vertices[2][2][0] = sRight; st->bitmap.vertices[2][2][1] = tBot; st->bitmap.vertices[3][0][0] = clip_x0; st->bitmap.vertices[3][0][1] = clip_y1; st->bitmap.vertices[3][2][0] = sLeft; st->bitmap.vertices[3][2][1] = tBot; /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; st->bitmap.vertices[i][0][3] = 1.0; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; st->bitmap.vertices[i][1][3] = color[3]; st->bitmap.vertices[i][2][2] = 0.0; /*R*/ st->bitmap.vertices[i][2][3] = 1.0; /*Q*/ } /* put vertex data into vbuf */ pipe_buffer_write_nooverlap(st->pipe, st->bitmap.vbuf, st->bitmap.vbuf_slot * sizeof(st->bitmap.vertices), sizeof st->bitmap.vertices, st->bitmap.vertices); return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices; }
/** * Allocate space for and store data in a buffer object. Any data that was * previously stored in the buffer object is lost. If data is NULL, * memory will be allocated, but no copy will occur. * Called via ctx->Driver.BufferData(). * \return GL_TRUE for success, GL_FALSE if out of memory */ static GLboolean st_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size, const GLvoid * data, GLenum usage, struct gl_buffer_object *obj) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); unsigned bind, pipe_usage; st_obj->Base.Size = size; st_obj->Base.Usage = usage; switch(target) { case GL_PIXEL_PACK_BUFFER_ARB: case GL_PIXEL_UNPACK_BUFFER_ARB: bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case GL_ARRAY_BUFFER_ARB: bind = PIPE_BIND_VERTEX_BUFFER; break; case GL_ELEMENT_ARRAY_BUFFER_ARB: bind = PIPE_BIND_INDEX_BUFFER; break; case GL_TEXTURE_BUFFER: bind = PIPE_BIND_SAMPLER_VIEW; break; case GL_TRANSFORM_FEEDBACK_BUFFER: bind = PIPE_BIND_STREAM_OUTPUT; break; case GL_UNIFORM_BUFFER: bind = PIPE_BIND_CONSTANT_BUFFER; break; default: bind = 0; } switch (usage) { case GL_STATIC_DRAW: case GL_STATIC_READ: case GL_STATIC_COPY: pipe_usage = PIPE_USAGE_STATIC; break; case GL_DYNAMIC_DRAW: case GL_DYNAMIC_READ: case GL_DYNAMIC_COPY: pipe_usage = PIPE_USAGE_DYNAMIC; break; case GL_STREAM_DRAW: case GL_STREAM_READ: case GL_STREAM_COPY: pipe_usage = PIPE_USAGE_STREAM; break; default: pipe_usage = PIPE_USAGE_DEFAULT; } pipe_resource_reference( &st_obj->buffer, NULL ); if (size != 0) { st_obj->buffer = pipe_buffer_create(pipe->screen, bind, pipe_usage, size); if (!st_obj->buffer) { /* out of memory */ st_obj->Base.Size = 0; return GL_FALSE; } if (data) pipe_buffer_write(pipe, st_obj->buffer, 0, size, data); return GL_TRUE; } return GL_TRUE; }
static bool init_buffers(struct vl_compositor *c) { struct fragment_shader_consts fsc; assert(c); /* * Create our vertex buffer and vertex buffer element * VB contains 4 vertices that render a quad covering the entire window * to display a rendered surface * Quad is rendered as a tri strip */ c->vertex_bufs[0].stride = sizeof(struct vertex2f); c->vertex_bufs[0].max_index = 3; c->vertex_bufs[0].buffer_offset = 0; c->vertex_bufs[0].buffer = pipe_buffer_create ( c->pipe->screen, 1, PIPE_BUFFER_USAGE_VERTEX, sizeof(struct vertex2f) * 4 ); memcpy ( pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), surface_verts, sizeof(struct vertex2f) * 4 ); pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer); c->vertex_elems[0].src_offset = 0; c->vertex_elems[0].vertex_buffer_index = 0; c->vertex_elems[0].nr_components = 2; c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* * Create our texcoord buffer and texcoord buffer element * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices */ c->vertex_bufs[1].stride = sizeof(struct vertex2f); c->vertex_bufs[1].max_index = 3; c->vertex_bufs[1].buffer_offset = 0; c->vertex_bufs[1].buffer = pipe_buffer_create ( c->pipe->screen, 1, PIPE_BUFFER_USAGE_VERTEX, sizeof(struct vertex2f) * 4 ); memcpy ( pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), surface_texcoords, sizeof(struct vertex2f) * 4 ); pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer); c->vertex_elems[1].src_offset = 0; c->vertex_elems[1].vertex_buffer_index = 1; c->vertex_elems[1].nr_components = 2; c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; /* * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors */ c->vs_const_buf.buffer = pipe_buffer_create ( c->pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, sizeof(struct vertex_shader_consts) ); /* * Create our fragment shader's constant buffer * Const buffer contains the color conversion matrix and bias vectors */ c->fs_const_buf.buffer = pipe_buffer_create ( c->pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts) ); vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix); vl_compositor_set_csc_matrix(c, fsc.matrix); return true; }
static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource *rbuffer = r600_resource(transfer->resource); struct r600_context *rctx = (struct r600_context*)pipe; uint8_t *data; if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(transfer->usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { unsigned i, mask; /* Discard the buffer. */ pb_reference(&rbuffer->buf, NULL); /* Create a new one in the same pipe_resource. */ /* XXX We probably want a different alignment for buffers and textures. */ r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096, rbuffer->b.b.bind, rbuffer->b.b.usage); /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ mask = rctx->vertex_buffer_state.enabled_mask; while (mask) { i = u_bit_scan(&mask); if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { rctx->vertex_buffer_state.dirty_mask |= 1 << i; r600_vertex_buffers_dirty(rctx); } } /* Streamout buffers. */ for (i = 0; i < rctx->num_so_targets; i++) { if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) { r600_context_streamout_end(rctx); rctx->streamout_start = TRUE; rctx->streamout_append_bitmask = ~0; } } /* Constant buffers. */ r600_set_constants_dirty_if_bound(rctx, rbuffer); } } #if 0 /* this is broken (see Bug 53130) */ else if ((transfer->usage & PIPE_TRANSFER_DISCARD_RANGE) && !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) && rctx->screen->has_streamout && /* The buffer range must be aligned to 4. */ transfer->box.x % 4 == 0 && transfer->box.width % 4 == 0) { assert(transfer->usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; rtransfer->staging = (struct r600_resource*) pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STAGING, transfer->box.width); return rctx->ws->buffer_map(rtransfer->staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); } } #endif data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage); if (!data) return NULL; return (uint8_t*)data + transfer->box.x; }
void r600_begin_new_cs(struct r600_context *ctx) { unsigned shader; if (ctx->is_debug) { uint32_t zero = 0; /* Create a buffer used for writing trace IDs and initialize it to 0. */ assert(!ctx->trace_buf); ctx->trace_buf = (struct r600_resource*) pipe_buffer_create(ctx->b.b.screen, 0, PIPE_USAGE_STAGING, 4); if (ctx->trace_buf) pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, 0, sizeof(zero), &zero); ctx->trace_id = 0; } if (ctx->trace_buf) eg_trace_emit(ctx); ctx->b.flags = 0; ctx->b.gtt = 0; ctx->b.vram = 0; /* Begin a new CS. */ r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); r600_mark_atom_dirty(ctx, &ctx->blend_color.atom); r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom); r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom); r600_mark_atom_dirty(ctx, &ctx->clip_state.atom); r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); r600_mark_atom_dirty(ctx, &ctx->db_state.atom); r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom); if (ctx->b.chip_class >= EVERGREEN) { r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom); r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom); r600_mark_atom_dirty(ctx, &ctx->compute_images.atom); r600_mark_atom_dirty(ctx, &ctx->compute_buffers.atom); } r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom); r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom); r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom); r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom); ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom); ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom); if (ctx->b.chip_class <= EVERGREEN) { r600_mark_atom_dirty(ctx, &ctx->config_state.atom); } r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom); r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_ES].atom); r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom); if (ctx->gs_shader) { r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_GS].atom); r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom); } if (ctx->tes_shader) { r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_HS].atom); r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[EG_HW_STAGE_LS].atom); } r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_VS].atom); r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); if (ctx->blend_state.cso) r600_mark_atom_dirty(ctx, &ctx->blend_state.atom); if (ctx->dsa_state.cso) r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom); if (ctx->rasterizer_state.cso) r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom); if (ctx->b.chip_class <= R700) { r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom); } ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask; r600_vertex_buffers_dirty(ctx); /* Re-emit shader resources. */ for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { struct r600_constbuf_state *constbuf = &ctx->constbuf_state[shader]; struct r600_textures_info *samplers = &ctx->samplers[shader]; constbuf->dirty_mask = constbuf->enabled_mask; samplers->views.dirty_mask = samplers->views.enabled_mask; samplers->states.dirty_mask = samplers->states.enabled_mask; r600_constant_buffers_dirty(ctx, constbuf); r600_sampler_views_dirty(ctx, &samplers->views); r600_sampler_states_dirty(ctx, &samplers->states); } for (shader = 0; shader < ARRAY_SIZE(ctx->scratch_buffers); shader++) { ctx->scratch_buffers[shader].dirty = true; } r600_postflush_resume_features(&ctx->b); /* Re-emit the draw state. */ ctx->last_primitive_type = -1; ctx->last_start_instance = -1; ctx->last_rast_prim = -1; ctx->current_rast_prim = -1; assert(!ctx->b.gfx.cs->prev_dw); ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; }
static struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->ws; int shader, i; bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0; if (!sctx) return NULL; sctx->has_graphics = sscreen->info.chip_class == SI || !(flags & PIPE_CONTEXT_COMPUTE_ONLY); if (flags & PIPE_CONTEXT_DEBUG) sscreen->record_llvm_ir = true; /* racy but not critical */ sctx->b.screen = screen; /* this must be set first */ sctx->b.priv = NULL; sctx->b.destroy = si_destroy_context; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers); slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers); sctx->ws = sscreen->ws; sctx->family = sscreen->info.family; sctx->chip_class = sscreen->info.chip_class; if (sscreen->info.has_gpu_reset_counter_query) { sctx->gpu_reset_counter = sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER); } if (sctx->chip_class == CIK || sctx->chip_class == VI || sctx->chip_class == GFX9) { sctx->eop_bug_scratch = si_resource( pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 16 * sscreen->info.num_render_backends)); if (!sctx->eop_bug_scratch) goto fail; } /* Initialize context allocators. */ sctx->allocator_zeroed_memory = u_suballocator_create(&sctx->b, 128 * 1024, 0, PIPE_USAGE_DEFAULT, SI_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_CLEAR, false); if (!sctx->allocator_zeroed_memory) goto fail; sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024, 0, PIPE_USAGE_STREAM, SI_RESOURCE_FLAG_READ_ONLY); if (!sctx->b.stream_uploader) goto fail; sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024, 0, PIPE_USAGE_STAGING, 0); if (!sctx->cached_gtt_allocator) goto fail; sctx->ctx = sctx->ws->ctx_create(sctx->ws); if (!sctx->ctx) goto fail; if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) { sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, (void*)si_flush_dma_cs, sctx, stop_exec_on_failure); } bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->dma_cs; sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024, 0, PIPE_USAGE_DEFAULT, SI_RESOURCE_FLAG_32BIT | (use_sdma_upload ? SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : (sscreen->cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY))); if (!sctx->b.const_uploader) goto fail; if (use_sdma_upload) u_upload_enable_flush_explicit(sctx->b.const_uploader); sctx->gfx_cs = ws->cs_create(sctx->ctx, sctx->has_graphics ? RING_GFX : RING_COMPUTE, (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure); /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = si_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table))); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; /* Initialize context functions used by graphics and compute. */ sctx->b.emit_string_marker = si_emit_string_marker; sctx->b.set_debug_callback = si_set_debug_callback; sctx->b.set_log_context = si_set_log_context; sctx->b.set_context_param = si_set_context_param; sctx->b.get_device_reset_status = si_get_reset_status; sctx->b.set_device_reset_callback = si_set_device_reset_callback; si_init_all_descriptors(sctx); si_init_buffer_functions(sctx); si_init_clear_functions(sctx); si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_compute_blit_functions(sctx); si_init_debug_functions(sctx); si_init_fence_functions(sctx); si_init_state_compute_functions(sctx); if (sscreen->debug_flags & DBG(FORCE_DMA)) sctx->b.resource_copy_region = sctx->dma_copy; /* Initialize graphics-only context functions. */ if (sctx->has_graphics) { si_init_context_texture_functions(sctx); si_init_query_functions(sctx); si_init_msaa_functions(sctx); si_init_shader_functions(sctx); si_init_state_functions(sctx); si_init_streamout_functions(sctx); si_init_viewport_functions(sctx); sctx->blitter = util_blitter_create(&sctx->b); if (sctx->blitter == NULL) goto fail; sctx->blitter->skip_viewport_restore = true; si_init_draw_functions(sctx); } /* Initialize SDMA functions. */ if (sctx->chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); sctx->sample_mask = 0xffff; /* Initialize multimedia functions. */ if (sscreen->info.has_hw_decode) { sctx->b.create_video_codec = si_uvd_create_decoder; sctx->b.create_video_buffer = si_video_buffer_create; } else { sctx->b.create_video_codec = vl_create_decoder; sctx->b.create_video_buffer = vl_video_buffer_create; } if (sctx->chip_class >= GFX9) { sctx->wait_mem_scratch = si_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4)); if (!sctx->wait_mem_scratch) goto fail; /* Initialize the memory. */ si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, V_370_MEM, V_370_ME, &sctx->wait_mem_number); } /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ if (sctx->chip_class == CIK) { sctx->null_const_buf.buffer = pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, 16, sctx->screen->info.tcc_cache_line_size); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; unsigned start_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.set_constant_buffer(&sctx->b, shader, i, &sctx->null_const_buf); } } si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &sctx->null_const_buf); si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &sctx->null_const_buf); si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &sctx->null_const_buf); si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &sctx->null_const_buf); si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &sctx->null_const_buf); } uint64_t max_threads_per_block; screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &max_threads_per_block); /* The maximum number of scratch waves. Scratch space isn't divided * evenly between CUs. The number is only a function of the number of CUs. * We can decrease the constant to decrease the scratch buffer size. * * sctx->scratch_waves must be >= the maximum posible size of * 1 threadgroup, so that the hw doesn't hang from being unable * to start any. * * The recommended value is 4 per CU at most. Higher numbers don't * bring much benefit, but they still occupy chip resources (think * async compute). I've seen ~2% performance difference between 4 and 32. */ sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units, max_threads_per_block / 64); si_init_compiler(sscreen, &sctx->compiler); /* Bindless handles. */ sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); util_dynarray_init(&sctx->resident_tex_handles, NULL); util_dynarray_init(&sctx->resident_img_handles, NULL); util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL); util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL); util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL); sctx->sample_pos_buffer = pipe_buffer_create(sctx->b.screen, 0, PIPE_USAGE_DEFAULT, sizeof(sctx->sample_positions)); pipe_buffer_write(&sctx->b, sctx->sample_pos_buffer, 0, sizeof(sctx->sample_positions), &sctx->sample_positions); /* this must be last */ si_begin_new_gfx_cs(sctx); if (sctx->chip_class == CIK) { /* Clear the NULL constant buffer, because loads should return zeros. * Note that this forces CP DMA to be used, because clover deadlocks * for some reason when the compute codepath is used. */ uint32_t clear_value = 0; si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, &clear_value, 4, SI_COHERENCY_SHADER, true); } return &sctx->b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b); return NULL; }
void vg_validate_state(struct vg_context *ctx) { if ((ctx->state.dirty & BLEND_DIRTY)) { struct pipe_blend_state *blend = &ctx->state.g3d.blend; memset(blend, 0, sizeof(struct pipe_blend_state)); blend->rt[0].blend_enable = 1; blend->rt[0].colormask = PIPE_MASK_RGBA; switch (ctx->state.vg.blend_mode) { case VG_BLEND_SRC: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].blend_enable = 0; break; case VG_BLEND_SRC_OVER: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; break; case VG_BLEND_DST_OVER: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_INV_DST_ALPHA; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_DST_ALPHA; break; case VG_BLEND_SRC_IN: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_DST_ALPHA; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; break; case VG_BLEND_DST_IN: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_SRC_ALPHA; break; case VG_BLEND_MULTIPLY: case VG_BLEND_SCREEN: case VG_BLEND_DARKEN: case VG_BLEND_LIGHTEN: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; blend->rt[0].blend_enable = 0; break; case VG_BLEND_ADDITIVE: blend->rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; blend->rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; break; default: assert(!"not implemented blend mode"); } cso_set_blend(ctx->cso_context, &ctx->state.g3d.blend); } if ((ctx->state.dirty & RASTERIZER_DIRTY)) { struct pipe_rasterizer_state *raster = &ctx->state.g3d.rasterizer; memset(raster, 0, sizeof(struct pipe_rasterizer_state)); raster->gl_rasterization_rules = 1; cso_set_rasterizer(ctx->cso_context, &ctx->state.g3d.rasterizer); } if ((ctx->state.dirty & VIEWPORT_DIRTY)) { struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb; const VGint param_bytes = 8 * sizeof(VGfloat); VGfloat vs_consts[8] = { 2.f/fb->width, 2.f/fb->height, 1, 1, -1, -1, 0, 0 }; struct pipe_buffer **cbuf = &ctx->vs_const_buffer; vg_set_viewport(ctx, VEGA_Y0_BOTTOM); pipe_buffer_reference(cbuf, NULL); *cbuf = pipe_buffer_create(ctx->pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, param_bytes); if (*cbuf) { st_no_flush_pipe_buffer_write(ctx, *cbuf, 0, param_bytes, vs_consts); } ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, *cbuf); } if ((ctx->state.dirty & VS_DIRTY)) { cso_set_vertex_shader_handle(ctx->cso_context, vg_plain_vs(ctx)); } /* must be last because it renders to the depth buffer*/ if ((ctx->state.dirty & DEPTH_STENCIL_DIRTY)) { update_clip_state(ctx); cso_set_depth_stencil_alpha(ctx->cso_context, &ctx->state.g3d.dsa); } shader_set_masking(ctx->shader, ctx->state.vg.masking); shader_set_image_mode(ctx->shader, ctx->state.vg.image_mode); ctx->state.dirty = NONE_DIRTY; }
/** Initialize the internal details */ struct program * pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen) { struct program *p; pp_debug("Initializing program\n"); if (!pscreen) return NULL; p = CALLOC(1, sizeof(struct program)); if (!p) return NULL; p->screen = pscreen; p->pipe = pscreen->context_create(pscreen, NULL); p->cso = cso_create_context(p->pipe); { static const float verts[4][2][4] = { { {1.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 0.0f, 1.0f} }, { {-1.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 1.0f, 0.0f, 1.0f} }, { {-1.0f, -1.0f, 0.0f, 1.0f}, {0.0f, 0.0f, 0.0f, 1.0f} }, { {1.0f, -1.0f, 0.0f, 1.0f}, {1.0f, 0.0f, 0.0f, 1.0f} } }; p->vbuf = pipe_buffer_create(pscreen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STATIC, sizeof(verts)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(verts), verts); } p->blend.rt[0].colormask = PIPE_MASK_RGBA; p->blend.rt[0].rgb_src_factor = p->blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA; p->blend.rt[0].rgb_dst_factor = p->blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA; p->rasterizer.cull_face = PIPE_FACE_NONE; p->rasterizer.gl_rasterization_rules = 1; p->sampler.wrap_s = p->sampler.wrap_t = p->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; p->sampler.min_img_filter = p->sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; p->sampler.normalized_coords = 1; p->sampler_point.wrap_s = p->sampler_point.wrap_t = p->sampler_point.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; p->sampler_point.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; p->sampler_point.min_img_filter = p->sampler_point.mag_img_filter = PIPE_TEX_FILTER_NEAREST; p->sampler_point.normalized_coords = 1; p->velem[0].src_offset = 0; p->velem[0].instance_divisor = 0; p->velem[0].vertex_buffer_index = 0; p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; p->velem[1].src_offset = 1 * 4 * sizeof(float); p->velem[1].instance_divisor = 0; p->velem[1].vertex_buffer_index = 0; p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; if (!p->screen->is_format_supported(p->screen, PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_BUFFER, 1, PIPE_BIND_VERTEX_BUFFER)) pp_debug("Vertex buf format fail\n"); { const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC }; const uint semantic_indexes[] = { 0, 0 }; p->passvs = util_make_vertex_passthrough_shader(p->pipe, 2, semantic_names, semantic_indexes); } p->framebuffer.nr_cbufs = 1; p->surf.usage = PIPE_BIND_RENDER_TARGET; p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM; p->pipe->set_sample_mask(p->pipe, ~0); return p; }
void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx, const struct pipe_draw_info *info) { debug_assert(v->type == MESA_SHADER_VERTEX); emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info) { const struct ir3_const_state *const_state = &v->shader->const_state; uint32_t offset = const_state->offsets.driver_param; if (v->constlen > offset) { uint32_t vertex_params[IR3_DP_VS_COUNT] = { [IR3_DP_VTXID_BASE] = info->index_size ? info->index_bias : info->start, [IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v), }; /* if no user-clip-planes, we don't need to emit the * entire thing: */ uint32_t vertex_params_size = 4; if (v->key.ucp_enables) { struct pipe_clip_state *ucp = &ctx->ucp; unsigned pos = IR3_DP_UCP0_X; for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { for (unsigned j = 0; j < 4; j++) { vertex_params[pos] = fui(ucp->ucp[i][j]); pos++; } } vertex_params_size = ARRAY_SIZE(vertex_params); } ring_wfi(ctx->batch, ring); bool needs_vtxid_base = ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0); /* for indirect draw, we need to copy VTXID_BASE from * indirect-draw parameters buffer.. which is annoying * and means we can't easily emit these consts in cmd * stream so need to copy them to bo. */ if (info->indirect && needs_vtxid_base) { struct pipe_draw_indirect_info *indirect = info->indirect; struct pipe_resource *vertex_params_rsc = pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM, vertex_params_size * 4); unsigned src_off = info->indirect->offset;; void *ptr; ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); memcpy(ptr, vertex_params, vertex_params_size * 4); if (info->index_size) { /* indexed draw, index_bias is 4th field: */ src_off += 3 * 4; } else { /* non-indexed draw, start is 3rd field: */ src_off += 2 * 4; } /* copy index_bias or start from draw params: */ ctx->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer, src_off, 1); ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0, vertex_params_size, NULL, vertex_params_rsc); pipe_resource_reference(&vertex_params_rsc, NULL); } else { ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0, vertex_params_size, vertex_params, NULL); } /* if needed, emit stream-out buffer addresses: */ if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { emit_tfbos(ctx, v, ring); } } }
/* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit * kernel parameters there are implicit parameters that need to be stored * in the vertex buffer as well. Here is how these parameters are organized in * the buffer: * * DWORDS 0-2: Number of work groups in each dimension (x,y,z) * DWORDS 3-5: Number of global work items in each dimension (x,y,z) * DWORDS 6-8: Number of work items within each work group in each dimension * (x,y,z) * DWORDS 9+ : Kernel parameters */ void evergreen_compute_upload_input( struct pipe_context *ctx_, const uint *block_layout, const uint *grid_layout, const void *input) { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; unsigned i; /* We need to reserve 9 dwords (36 bytes) for implicit kernel * parameters. */ unsigned input_size = shader->input_size + 36; uint32_t * num_work_groups_start; uint32_t * global_size_start; uint32_t * local_size_start; uint32_t * kernel_parameters_start; struct pipe_box box; struct pipe_transfer *transfer = NULL; if (shader->input_size == 0) { return; } if (!shader->kernel_param) { /* Add space for the grid dimensions */ shader->kernel_param = (struct r600_resource *) pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, input_size); } u_box_1d(0, input_size, &box); num_work_groups_start = ctx_->transfer_map(ctx_, (struct pipe_resource*)shader->kernel_param, 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, &box, &transfer); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); /* Copy the work group size */ memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint)); /* Copy the global size */ for (i = 0; i < 3; i++) { global_size_start[i] = grid_layout[i] * block_layout[i]; } /* Copy the local dimensions */ memcpy(local_size_start, block_layout, 3 * sizeof(uint)); /* Copy the kernel inputs */ memcpy(kernel_parameters_start, input, shader->input_size); for (i = 0; i < (input_size / 4); i++) { COMPUTE_DBG(ctx->screen, "input %i : %u\n", i, ((unsigned*)num_work_groups_start)[i]); } ctx_->transfer_unmap(ctx_, transfer); /* ID=0 is reserved for the parameters */ evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size, (struct pipe_resource*)shader->kernel_param); }
struct pipe_context * dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) { struct dd_context *dctx; if (!pipe) return NULL; dctx = CALLOC_STRUCT(dd_context); if (!dctx) goto fail; dctx->pipe = pipe; dctx->base.priv = pipe->priv; /* expose wrapped priv data */ dctx->base.screen = &dscreen->base; dctx->base.stream_uploader = pipe->stream_uploader; dctx->base.const_uploader = pipe->const_uploader; dctx->base.destroy = dd_context_destroy; CTX_INIT(render_condition); CTX_INIT(create_query); CTX_INIT(create_batch_query); CTX_INIT(destroy_query); CTX_INIT(begin_query); CTX_INIT(end_query); CTX_INIT(get_query_result); CTX_INIT(set_active_query_state); CTX_INIT(create_blend_state); CTX_INIT(bind_blend_state); CTX_INIT(delete_blend_state); CTX_INIT(create_sampler_state); CTX_INIT(bind_sampler_states); CTX_INIT(delete_sampler_state); CTX_INIT(create_rasterizer_state); CTX_INIT(bind_rasterizer_state); CTX_INIT(delete_rasterizer_state); CTX_INIT(create_depth_stencil_alpha_state); CTX_INIT(bind_depth_stencil_alpha_state); CTX_INIT(delete_depth_stencil_alpha_state); CTX_INIT(create_fs_state); CTX_INIT(bind_fs_state); CTX_INIT(delete_fs_state); CTX_INIT(create_vs_state); CTX_INIT(bind_vs_state); CTX_INIT(delete_vs_state); CTX_INIT(create_gs_state); CTX_INIT(bind_gs_state); CTX_INIT(delete_gs_state); CTX_INIT(create_tcs_state); CTX_INIT(bind_tcs_state); CTX_INIT(delete_tcs_state); CTX_INIT(create_tes_state); CTX_INIT(bind_tes_state); CTX_INIT(delete_tes_state); CTX_INIT(create_compute_state); CTX_INIT(bind_compute_state); CTX_INIT(delete_compute_state); CTX_INIT(create_vertex_elements_state); CTX_INIT(bind_vertex_elements_state); CTX_INIT(delete_vertex_elements_state); CTX_INIT(set_blend_color); CTX_INIT(set_stencil_ref); CTX_INIT(set_sample_mask); CTX_INIT(set_min_samples); CTX_INIT(set_clip_state); CTX_INIT(set_constant_buffer); CTX_INIT(set_framebuffer_state); CTX_INIT(set_polygon_stipple); CTX_INIT(set_scissor_states); CTX_INIT(set_viewport_states); CTX_INIT(set_sampler_views); CTX_INIT(set_tess_state); CTX_INIT(set_shader_buffers); CTX_INIT(set_shader_images); CTX_INIT(set_vertex_buffers); CTX_INIT(create_stream_output_target); CTX_INIT(stream_output_target_destroy); CTX_INIT(set_stream_output_targets); CTX_INIT(create_sampler_view); CTX_INIT(sampler_view_destroy); CTX_INIT(create_surface); CTX_INIT(surface_destroy); CTX_INIT(transfer_map); CTX_INIT(transfer_flush_region); CTX_INIT(transfer_unmap); CTX_INIT(buffer_subdata); CTX_INIT(texture_subdata); CTX_INIT(texture_barrier); CTX_INIT(memory_barrier); CTX_INIT(resource_commit); /* create_video_codec */ /* create_video_buffer */ /* set_compute_resources */ /* set_global_binding */ CTX_INIT(get_sample_position); CTX_INIT(invalidate_resource); CTX_INIT(get_device_reset_status); CTX_INIT(set_device_reset_callback); CTX_INIT(dump_debug_state); CTX_INIT(emit_string_marker); CTX_INIT(create_texture_handle); CTX_INIT(delete_texture_handle); CTX_INIT(make_texture_handle_resident); CTX_INIT(create_image_handle); CTX_INIT(delete_image_handle); CTX_INIT(make_image_handle_resident); dd_init_draw_functions(dctx); u_log_context_init(&dctx->log); if (pipe->set_log_context) pipe->set_log_context(pipe, &dctx->log); dctx->draw_state.sample_mask = ~0; if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) { dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4); if (!dctx->fence) goto fail; dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence, PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_PERSISTENT | PIPE_TRANSFER_COHERENT, &dctx->fence_transfer); if (!dctx->mapped_fence) goto fail; *dctx->mapped_fence = 0; (void) mtx_init(&dctx->mutex, mtx_plain); dctx->thread = u_thread_create(dd_thread_pipelined_hang_detect, dctx); if (!dctx->thread) { mtx_destroy(&dctx->mutex); goto fail; } } return &dctx->base; fail: if (dctx) { if (dctx->mapped_fence) pipe_transfer_unmap(pipe, dctx->fence_transfer); pipe_resource_reference(&dctx->fence, NULL); FREE(dctx); } pipe->destroy(pipe); return NULL; }
/** * Allocate space for and store data in a buffer object. Any data that was * previously stored in the buffer object is lost. If data is NULL, * memory will be allocated, but no copy will occur. * Called via ctx->Driver.BufferData(). * \return GL_TRUE for success, GL_FALSE if out of memory */ static GLboolean st_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size, const GLvoid * data, GLenum usage, struct gl_buffer_object *obj) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); unsigned bind, pipe_usage; if (size && data && st_obj->buffer && st_obj->Base.Size == size && st_obj->Base.Usage == usage) { /* Just discard the old contents and write new data. * This should be the same as creating a new buffer, but we avoid * a lot of validation in Mesa. */ struct pipe_box box; u_box_1d(0, size, &box); pipe->transfer_inline_write(pipe, st_obj->buffer, 0, PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE, &box, data, 0, 0); return GL_TRUE; } st_obj->Base.Size = size; st_obj->Base.Usage = usage; switch (target) { case GL_PIXEL_PACK_BUFFER_ARB: case GL_PIXEL_UNPACK_BUFFER_ARB: bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; break; case GL_ARRAY_BUFFER_ARB: bind = PIPE_BIND_VERTEX_BUFFER; break; case GL_ELEMENT_ARRAY_BUFFER_ARB: bind = PIPE_BIND_INDEX_BUFFER; break; case GL_TEXTURE_BUFFER: bind = PIPE_BIND_SAMPLER_VIEW; break; case GL_TRANSFORM_FEEDBACK_BUFFER: bind = PIPE_BIND_STREAM_OUTPUT; break; case GL_UNIFORM_BUFFER: bind = PIPE_BIND_CONSTANT_BUFFER; break; default: bind = 0; } switch (usage) { case GL_STATIC_DRAW: case GL_STATIC_READ: case GL_STATIC_COPY: default: pipe_usage = PIPE_USAGE_DEFAULT; break; case GL_DYNAMIC_DRAW: case GL_DYNAMIC_READ: case GL_DYNAMIC_COPY: pipe_usage = PIPE_USAGE_DYNAMIC; break; case GL_STREAM_DRAW: case GL_STREAM_READ: case GL_STREAM_COPY: pipe_usage = PIPE_USAGE_STREAM; break; } pipe_resource_reference( &st_obj->buffer, NULL ); if (ST_DEBUG & DEBUG_BUFFER) { debug_printf("Create buffer size %td bind 0x%x\n", size, bind); } if (size != 0) { st_obj->buffer = pipe_buffer_create(pipe->screen, bind, pipe_usage, size); if (!st_obj->buffer) { /* out of memory */ st_obj->Base.Size = 0; return GL_FALSE; } if (data) pipe_buffer_write(pipe, st_obj->buffer, 0, size, data); } /* BufferData may change an array or uniform buffer, need to update it */ st->dirty.st |= ST_NEW_VERTEX_ARRAYS | ST_NEW_UNIFORM_BUFFER; return GL_TRUE; }
/** * Draw a screen-aligned quadrilateral. * Coords are clip coords with y=0=bottom. */ static void draw_quad(struct st_context *st, float x0, float y0, float x1, float y1, GLfloat z, const GLfloat color[4]) { struct pipe_context *pipe = st->pipe; /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as * no_flush) updates to buffers where we know there is no conflict * with previous data. Currently using max_slots > 1 will cause * synchronous rendering if the driver flushes its command buffers * between one bitmap and the next. Our flush hook below isn't * sufficient to catch this as the driver doesn't tell us when it * flushes its own command buffers. Until this gets fixed, pay the * price of allocating a new buffer for each bitmap cache-flush to * avoid synchronous rendering. */ const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ GLuint i; if (st->clear.vbuf_slot >= max_slots) { pipe_resource_reference(&st->clear.vbuf, NULL); st->clear.vbuf_slot = 0; } if (!st->clear.vbuf) { st->clear.vbuf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, max_slots * sizeof(st->clear.vertices)); } /* positions */ st->clear.vertices[0][0][0] = x0; st->clear.vertices[0][0][1] = y0; st->clear.vertices[1][0][0] = x1; st->clear.vertices[1][0][1] = y0; st->clear.vertices[2][0][0] = x1; st->clear.vertices[2][0][1] = y1; st->clear.vertices[3][0][0] = x0; st->clear.vertices[3][0][1] = y1; /* same for all verts: */ for (i = 0; i < 4; i++) { st->clear.vertices[i][0][2] = z; st->clear.vertices[i][0][3] = 1.0; st->clear.vertices[i][1][0] = color[0]; st->clear.vertices[i][1][1] = color[1]; st->clear.vertices[i][1][2] = color[2]; st->clear.vertices[i][1][3] = color[3]; } /* put vertex data into vbuf */ pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, st->clear.vbuf_slot * sizeof(st->clear.vertices), sizeof(st->clear.vertices), st->clear.vertices); /* draw */ util_draw_vertex_buffer(pipe, st->cso_context, st->clear.vbuf, st->clear.vbuf_slot * sizeof(st->clear.vertices), PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ /* Increment slot */ st->clear.vbuf_slot++; }
.writemask = 0, .func = PIPE_FUNC_LESS /* GL default */ }, .stencil[0] = { .enabled = 0 }, .stencil[1] = { .enabled = 0 }, .alpha = { .enabled = 0 } }); /* particles */ struct pipe_resource *vtx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); struct pipe_vertex_buffer vertex_buffer_desc = { .stride = PARTICLE_SIZE*4, .buffer_offset = 0, .buffer = vtx_resource, .user_buffer = 0 }; struct pipe_vertex_element pipe_vertex_elements[] = { { /* positions */ .src_offset = 0x0, .instance_divisor = 0, .vertex_buffer_index = 0, .src_format = PIPE_FORMAT_R32_FLOAT }, { /* normals */ .src_offset = 0x4,
int main(int argc, char **argv) { struct fbdemos_scaffold *fbs = 0; fbdemo_init(&fbs); int width = fbs->width; int height = fbs->height; struct pipe_context *pipe = fbs->pipe; dds_texture *dds = 0; if(argc<2 || !dds_load(argv[1], &dds)) { printf("Error loading texture\n"); exit(1); } uint32_t tex_format = 0; uint32_t tex_base_width = dds->slices[0][0].width; uint32_t tex_base_height = dds->slices[0][0].height; switch(dds->fmt) { case FMT_A8R8G8B8: tex_format = PIPE_FORMAT_B8G8R8A8_UNORM; break; case FMT_X8R8G8B8: tex_format = PIPE_FORMAT_B8G8R8X8_UNORM; break; case FMT_DXT1: tex_format = PIPE_FORMAT_DXT1_RGB; break; case FMT_DXT3: tex_format = PIPE_FORMAT_DXT3_RGBA; break; case FMT_DXT5: tex_format = PIPE_FORMAT_DXT5_RGBA; break; case FMT_ETC1: tex_format = PIPE_FORMAT_ETC1_RGB8; break; case FMT_A8: tex_format = PIPE_FORMAT_A8_UNORM; break; case FMT_L8: tex_format = PIPE_FORMAT_L8_UNORM; break; case FMT_A8L8: tex_format = PIPE_FORMAT_L8A8_UNORM; break; default: printf("Unknown texture format\n"); exit(1); } struct pipe_resource *tex_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_SAMPLER_VIEW, tex_format, tex_base_width, tex_base_height, dds->num_mipmaps - 1); printf("Loading compressed texture (format %i, %ix%i)\n", dds->fmt, tex_base_width, tex_base_height); for(int ix=0; ix<dds->num_mipmaps; ++ix) { printf("%08x: Uploading mipmap %i (%ix%i)\n", dds->slices[0][ix].offset, ix, dds->slices[0][ix].width, dds->slices[0][ix].height); etna_pipe_inline_write(pipe, tex_resource, 0, ix, dds->slices[0][ix].data, dds->slices[0][ix].size); } /* resources */ struct pipe_resource *rt_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_B8G8R8X8_UNORM, width, height, 0); struct pipe_resource *z_resource = fbdemo_create_2d(fbs->screen, PIPE_BIND_RENDER_TARGET, PIPE_FORMAT_Z16_UNORM, width, height, 0); struct pipe_resource *vtx_resource = pipe_buffer_create(fbs->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE, VERTEX_BUFFER_SIZE); /* bind render target to framebuffer */ etna_fb_bind_resource(&fbs->fb, rt_resource); /* Phew, now we got all the memory we need. * Write interleaved attribute vertex stream. * Unlike the GL example we only do this once, not every time glDrawArrays is called, the same would be accomplished * from GL by using a vertex buffer object. */ struct pipe_transfer *vtx_transfer = 0; float *vtx_logical = pipe_buffer_map(pipe, vtx_resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED, &vtx_transfer); assert(vtx_logical); for(int vert=0; vert<NUM_VERTICES; ++vert) { int dest_idx = vert * (3 + 3 + 2); for(int comp=0; comp<3; ++comp) vtx_logical[dest_idx+comp+0] = vVertices[vert*3 + comp]; /* 0 */ for(int comp=0; comp<3; ++comp) vtx_logical[dest_idx+comp+3] = vNormals[vert*3 + comp]; /* 1 */ for(int comp=0; comp<2; ++comp) vtx_logical[dest_idx+comp+6] = vTexCoords[vert*2 + comp]; /* 2 */ } pipe_buffer_unmap(pipe, vtx_transfer); /* compile gallium3d states */ void *blend = NULL; if(tex_format == PIPE_FORMAT_A8_UNORM || tex_format == PIPE_FORMAT_L8A8_UNORM) /* if alpha texture, enable blending */ { blend = pipe->create_blend_state(pipe, &(struct pipe_blend_state) { .rt[0] = { .blend_enable = 1, .rgb_func = PIPE_BLEND_ADD, .rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA, .rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA, .alpha_func = PIPE_BLEND_ADD, .alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA, .alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA, .colormask = 0xf } });
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { char llvm_string[32] = {}; ws->query_info(ws, &rscreen->info); #if HAVE_LLVM snprintf(llvm_string, sizeof(llvm_string), ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); #endif snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), "%s (DRM %i.%i.%i%s)", r600_get_chip_name(rscreen), rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel, llvm_string); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; rscreen->b.is_video_format_supported = rvid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); if (!r600_init_tiling(rscreen)) { return false; } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) || rscreen->info.drm_major == 3) && (rscreen->debug_flags & DBG_TRACE_CS)) { rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4096); if (rscreen->trace_bo) { rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->buf, NULL, PIPE_TRANSFER_UNSYNCHRONIZED); } } if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); printf("family = %i\n", rscreen->info.family); printf("chip_class = %i\n", rscreen->info.chip_class); printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); printf("max_sclk = %i\n", rscreen->info.max_sclk); printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); printf("drm = %i.%i.%i\n", rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel); printf("has_uvd = %i\n", rscreen->info.has_uvd); printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends); printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq); printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config); printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes); printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes); printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address); printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma); printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map); printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid); printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid); printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid); }
void si_begin_new_cs(struct si_context *ctx) { if (ctx->is_debug) { uint32_t zero = 0; /* Create a buffer used for writing trace IDs and initialize it to 0. */ assert(!ctx->trace_buf); ctx->trace_buf = (struct r600_resource*) pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4); if (ctx->trace_buf) pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, 0, sizeof(zero), &zero); ctx->trace_id = 0; } if (ctx->trace_buf) si_trace_emit(ctx); /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_TC_L1 | SI_CONTEXT_INV_TC_L2 | SI_CONTEXT_INV_KCACHE | SI_CONTEXT_INV_ICACHE; /* set all valid group as dirty so they get reemited on * next draw command */ si_pm4_reset_emitted(ctx); /* The CS initialization should be emitted before everything else. */ si_pm4_emit(ctx, ctx->init_config); ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); si_mark_atom_dirty(ctx, &ctx->cb_target_mask); si_mark_atom_dirty(ctx, &ctx->blend_color.atom); si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); si_mark_atom_dirty(ctx, &ctx->spi_map); si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_all_descriptors_begin_new_cs(ctx); ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(ctx, &ctx->scissors.atom); si_mark_atom_dirty(ctx, &ctx->viewports.atom); r600_postflush_resume_features(&ctx->b); ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw; /* Invalidate various draw states so that they are emitted before * the first draw call. */ si_invalidate_draw_sh_constants(ctx); ctx->last_primitive_restart_en = -1; ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; ctx->last_gs_out_prim = -1; ctx->last_prim = -1; ctx->last_multi_vgt_param = -1; ctx->last_ls_hs_config = -1; ctx->last_rast_prim = -1; ctx->last_sc_line_stipple = ~0; ctx->emit_scratch_reloc = true; ctx->last_ls = NULL; ctx->last_tcs = NULL; ctx->last_tes_sh_base = -1; ctx->last_num_tcs_input_cp = -1; }
static void svga_vbuf_render_draw_elements( struct vbuf_render *render, const ushort *indices, uint nr_indices) { struct svga_vbuf_render *svga_render = svga_vbuf_render(render); struct svga_context *svga = svga_render->svga; struct pipe_screen *screen = svga->pipe.screen; int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size; boolean ret; size_t size = 2 * nr_indices; assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0); if (svga_render->ibuf_size < svga_render->ibuf_offset + size) pipe_resource_reference(&svga_render->ibuf, NULL); if (!svga_render->ibuf) { svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size); svga_render->ibuf = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM, svga_render->ibuf_size); svga_render->ibuf_offset = 0; } pipe_buffer_write_nooverlap(&svga->pipe, svga_render->ibuf, svga_render->ibuf_offset, 2 * nr_indices, indices); /* off to hardware */ svga_vbuf_submit_state(svga_render); /* Need to call update_state() again as the draw module may have * altered some of our state behind our backs. Testcase: * redbook/polys.c */ svga_update_state_retry( svga, SVGA_STATE_HW_DRAW ); ret = svga_hwtnl_draw_range_elements(svga->hwtnl, svga_render->ibuf, 2, bias, svga_render->min_index, svga_render->max_index, svga_render->prim, svga_render->ibuf_offset / 2, nr_indices); if(ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = svga_hwtnl_draw_range_elements(svga->hwtnl, svga_render->ibuf, 2, bias, svga_render->min_index, svga_render->max_index, svga_render->prim, svga_render->ibuf_offset / 2, nr_indices); svga->swtnl.new_vbuf = TRUE; assert(ret == PIPE_OK); } svga_render->ibuf_offset += size; }
static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && usage & PIPE_TRANSFER_WRITE && !rbuffer->is_shared && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } /* If discarding the entire range, discard the whole resource instead. */ if (usage & PIPE_TRANSFER_DISCARD_RANGE && box->x == 0 && box->width == resource->width0) { usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(usage & PIPE_TRANSFER_WRITE); if (r600_invalidate_buffer(rctx, rbuffer)) { /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else { /* Fall back to a temporary buffer. */ usage |= PIPE_TRANSFER_DISCARD_RANGE; } } if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT)) && !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; struct r600_resource *staging = NULL; u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT), 256, &offset, (struct pipe_resource**)&staging, (void**)&data); if (staging) { data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, offset); } } else { /* At this point, the buffer is always idle (we checked it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } } /* Using a staging buffer in GTT for larger reads is much faster. */ else if ((usage & PIPE_TRANSFER_READ) && !(usage & (PIPE_TRANSFER_WRITE | PIPE_TRANSFER_PERSISTENT)) && rbuffer->domains & RADEON_DOMAIN_VRAM && r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) { struct r600_resource *staging; staging = (struct r600_resource*) pipe_buffer_create( ctx->screen, 0, PIPE_USAGE_STAGING, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT)); if (staging) { /* Copy the VRAM buffer to the staging buffer. */ ctx->resource_copy_region(ctx, &staging->b.b, 0, box->x % R600_MAP_BUFFER_ALIGNMENT, 0, 0, resource, level, box); data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); if (!data) { r600_resource_reference(&staging, NULL); return NULL; } data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, staging, 0); } } data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } data += box->x; return r600_buffer_get_transfer(ctx, resource, level, usage, box, ptransfer, data, NULL, 0); }
static void init_prog(struct program *p) { struct pipe_surface surf_tmpl; int ret; /* find a hardware device */ ret = pipe_loader_probe(&p->dev, 1); assert(ret); /* init a pipe screen */ p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR); assert(p->screen); /* create the pipe driver context and cso context */ p->pipe = p->screen->context_create(p->screen, NULL); p->cso = cso_create_context(p->pipe); /* set clear color */ p->clear_color.f[0] = 0.3; p->clear_color.f[1] = 0.1; p->clear_color.f[2] = 0.3; p->clear_color.f[3] = 1.0; /* vertex buffer */ { float vertices[4][2][4] = { { { 0.9f, 0.9f, 0.0f, 1.0f }, { 1.0f, 1.0f, 0.0f, 1.0f } }, { { -0.9f, 0.9f, 0.0f, 1.0f }, { 0.0f, 1.0f, 0.0f, 1.0f } }, { { -0.9f, -0.9f, 0.0f, 1.0f }, { 0.0f, 0.0f, 1.0f, 1.0f } }, { { 0.9f, -0.9f, 0.0f, 1.0f }, { 1.0f, 0.0f, 1.0f, 1.0f } } }; p->vbuf = pipe_buffer_create(p->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_DEFAULT, sizeof(vertices)); pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(vertices), vertices); } /* render target texture */ { struct pipe_resource tmplt; memset(&tmplt, 0, sizeof(tmplt)); tmplt.target = PIPE_TEXTURE_2D; tmplt.format = PIPE_FORMAT_B8G8R8A8_UNORM; /* All drivers support this */ tmplt.width0 = WIDTH; tmplt.height0 = HEIGHT; tmplt.depth0 = 1; tmplt.array_size = 1; tmplt.last_level = 0; tmplt.bind = PIPE_BIND_RENDER_TARGET; p->target = p->screen->resource_create(p->screen, &tmplt); } /* sampler texture */ { uint32_t *ptr; struct pipe_transfer *t; struct pipe_resource t_tmplt; struct pipe_sampler_view v_tmplt; struct pipe_box box; memset(&t_tmplt, 0, sizeof(t_tmplt)); t_tmplt.target = PIPE_TEXTURE_2D; t_tmplt.format = PIPE_FORMAT_B8G8R8A8_UNORM; /* All drivers support this */ t_tmplt.width0 = 2; t_tmplt.height0 = 2; t_tmplt.depth0 = 1; t_tmplt.array_size = 1; t_tmplt.last_level = 0; t_tmplt.bind = PIPE_BIND_RENDER_TARGET; p->tex = p->screen->resource_create(p->screen, &t_tmplt); memset(&box, 0, sizeof(box)); box.width = 2; box.height = 2; ptr = p->pipe->transfer_map(p->pipe, p->tex, 0, PIPE_TRANSFER_WRITE, &box, &t); ptr[0] = 0xffff0000; ptr[1] = 0xff0000ff; ptr[2] = 0xff00ff00; ptr[3] = 0xffffff00; p->pipe->transfer_unmap(p->pipe, t); u_sampler_view_default_template(&v_tmplt, p->tex, p->tex->format); p->view = p->pipe->create_sampler_view(p->pipe, p->tex, &v_tmplt); } /* disabled blending/masking */ memset(&p->blend, 0, sizeof(p->blend)); p->blend.rt[0].colormask = PIPE_MASK_RGBA; /* no-op depth/stencil/alpha */ memset(&p->depthstencil, 0, sizeof(p->depthstencil)); /* rasterizer */ memset(&p->rasterizer, 0, sizeof(p->rasterizer)); p->rasterizer.cull_face = PIPE_FACE_NONE; p->rasterizer.half_pixel_center = 1; p->rasterizer.bottom_edge_rule = 1; p->rasterizer.depth_clip = 1; /* sampler */ memset(&p->sampler, 0, sizeof(p->sampler)); p->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; p->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; p->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; p->sampler.min_img_filter = PIPE_TEX_MIPFILTER_LINEAR; p->sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; p->sampler.normalized_coords = 1; surf_tmpl.format = PIPE_FORMAT_B8G8R8A8_UNORM; /* All drivers support this */ surf_tmpl.u.tex.level = 0; surf_tmpl.u.tex.first_layer = 0; surf_tmpl.u.tex.last_layer = 0; /* drawing destination */ memset(&p->framebuffer, 0, sizeof(p->framebuffer)); p->framebuffer.width = WIDTH; p->framebuffer.height = HEIGHT; p->framebuffer.nr_cbufs = 1; p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, p->target, &surf_tmpl); /* viewport, depth isn't really needed */ { float x = 0; float y = 0; float z = FAR; float half_width = (float)WIDTH / 2.0f; float half_height = (float)HEIGHT / 2.0f; float half_depth = ((float)FAR - (float)NEAR) / 2.0f; float scale, bias; if (FLIP) { scale = -1.0f; bias = (float)HEIGHT; } else { scale = 1.0f; bias = 0.0f; } p->viewport.scale[0] = half_width; p->viewport.scale[1] = half_height * scale; p->viewport.scale[2] = half_depth; p->viewport.translate[0] = half_width + x; p->viewport.translate[1] = (half_height + y) * scale + bias; p->viewport.translate[2] = half_depth + z; } /* vertex elements state */ memset(p->velem, 0, sizeof(p->velem)); p->velem[0].src_offset = 0 * 4 * sizeof(float); /* offset 0, first element */ p->velem[0].instance_divisor = 0; p->velem[0].vertex_buffer_index = 0; p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; p->velem[1].src_offset = 1 * 4 * sizeof(float); /* offset 16, second element */ p->velem[1].instance_divisor = 0; p->velem[1].vertex_buffer_index = 0; p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; /* vertex shader */ { const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC }; const uint semantic_indexes[] = { 0, 0 }; p->vs = util_make_vertex_passthrough_shader(p->pipe, 2, semantic_names, semantic_indexes, FALSE); } /* fragment shader */ p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR); }
/** * Draw quad with texcoords and optional color. * Coords are gallium window coords with y=0=top. * \param color may be null * \param invertTex if true, flip texcoords vertically */ static void draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, GLfloat x1, GLfloat y1, const GLfloat *color, GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ /* setup vertex data */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLfloat fb_width = (GLfloat) fb->Width; const GLfloat fb_height = (GLfloat) fb->Height; const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; const GLfloat sLeft = 0.0f, sRight = maxXcoord; const GLfloat tTop = invertTex ? maxYcoord : 0.0f; const GLfloat tBot = invertTex ? 0.0f : maxYcoord; GLuint i; /* upper-left */ verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ /* upper-right */ verts[1][0][0] = clip_x1; verts[1][0][1] = clip_y0; /* lower-right */ verts[2][0][0] = clip_x1; verts[2][0][1] = clip_y1; /* lower-left */ verts[3][0][0] = clip_x0; verts[3][0][1] = clip_y1; verts[0][1][0] = sLeft; /* v[0].attr[1].S */ verts[0][1][1] = tTop; /* v[0].attr[1].T */ verts[1][1][0] = sRight; verts[1][1][1] = tTop; verts[2][1][0] = sRight; verts[2][1][1] = tBot; verts[3][1][0] = sLeft; verts[3][1][1] = tBot; /* same for all verts: */ if (color) { for (i = 0; i < 4; i++) { verts[i][0][2] = z; /* v[i].attr[0].z */ verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ verts[i][2][0] = color[0]; /* v[i].attr[2].r */ verts[i][2][1] = color[1]; /* v[i].attr[2].g */ verts[i][2][2] = color[2]; /* v[i].attr[2].b */ verts[i][2][3] = color[3]; /* v[i].attr[2].a */ verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ } } else { for (i = 0; i < 4; i++) { verts[i][0][2] = z; /*Z*/ verts[i][0][3] = 1.0f; /*W*/ verts[i][1][2] = 0.0f; /*R*/ verts[i][1][3] = 1.0f; /*Q*/ } } } { struct pipe_resource *buf; /* allocate/load buffer object with vertex data */ buf = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STATIC, sizeof(verts)); pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts); util_draw_vertex_buffer(pipe, st->cso_context, buf, 0, PIPE_PRIM_QUADS, 4, /* verts */ 3); /* attribs/vert */ pipe_resource_reference(&buf, NULL); } }
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; int shader, i; if (!sctx) return NULL; if (sscreen->b.debug_flags & DBG_CHECK_VM) flags |= PIPE_CONTEXT_DEBUG; if (flags & PIPE_CONTEXT_DEBUG) sscreen->record_llvm_ir = true; /* racy but not critical */ sctx->b.b.screen = screen; /* this must be set first */ sctx->b.b.priv = priv; sctx->b.b.destroy = si_destroy_context; sctx->b.b.emit_string_marker = si_emit_string_marker; sctx->b.set_atom_dirty = (void *)si_set_atom_dirty; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; if (!r600_common_context_init(&sctx->b, &sscreen->b)) goto fail; if (sscreen->b.info.drm_major == 3) sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status; si_init_blit_functions(sctx); si_init_compute_functions(sctx); si_init_cp_dma_functions(sctx); si_init_debug_functions(sctx); if (sscreen->b.info.has_uvd) { sctx->b.b.create_video_codec = si_uvd_create_decoder; sctx->b.b.create_video_buffer = si_video_buffer_create; } else { sctx->b.b.create_video_codec = vl_create_decoder; sctx->b.b.create_video_buffer = vl_video_buffer_create; } sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; if (ws->cs_add_const_preamble_ib) { sctx->ce_preamble_ib = ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); if (!sctx->ce_preamble_ib) goto fail; } sctx->ce_suballocator = u_suballocator_create(&sctx->b.b, 1024 * 1024, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, false); if (!sctx->ce_suballocator) goto fail; } sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_table) goto fail; sctx->border_color_buffer = (struct r600_resource*) pipe_buffer_create(screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); if (sscreen->b.debug_flags & DBG_FORCE_DMA) sctx->b.b.resource_copy_region = sctx->b.dma_copy; sctx->blitter = util_blitter_create(&sctx->b.b); if (sctx->blitter == NULL) goto fail; sctx->blitter->draw_rectangle = r600_draw_rectangle; sctx->sample_mask.sample_mask = 0xffff; /* these must be last */ si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); if (!sctx->null_const_buf.buffer) goto fail; sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; for (shader = 0; shader < SI_NUM_SHADERS; shader++) { for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i, &sctx->null_const_buf); } } /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, R600_COHERENCY_SHADER); } uint64_t max_threads_per_block; screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &max_threads_per_block); /* The maximum number of scratch waves. Scratch space isn't divided * evenly between CUs. The number is only a function of the number of CUs. * We can decrease the constant to decrease the scratch buffer size. * * sctx->scratch_waves must be >= the maximum posible size of * 1 threadgroup, so that the hw doesn't hang from being unable * to start any. * * The recommended value is 4 per CU at most. Higher numbers don't * bring much benefit, but they still occupy chip resources (think * async compute). I've seen ~2% performance difference between 4 and 32. */ sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units, max_threads_per_block / 64); sctx->tm = si_create_llvm_target_machine(sscreen); return &sctx->b.b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b.b); return NULL; }
/** * Draw quad with texcoords and optional color. * Coords are window coords with y=0=bottom. * \param color may be null * \param invertTex if true, flip texcoords vertically */ static void draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, GLfloat x1, GLfloat y1, const GLfloat *color, GLboolean invertTex) { struct st_context *st = ctx->st; struct pipe_context *pipe = ctx->st->pipe; GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ /* setup vertex data */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLfloat fb_width = (GLfloat) fb->Width; const GLfloat fb_height = (GLfloat) fb->Height; const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; const GLfloat sLeft = 0.0f, sRight = 1.0f; const GLfloat tTop = invertTex, tBot = 1.0f - tTop; GLuint tex, i; /* upper-left */ verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ /* upper-right */ verts[1][0][0] = clip_x1; verts[1][0][1] = clip_y0; /* lower-right */ verts[2][0][0] = clip_x1; verts[2][0][1] = clip_y1; /* lower-left */ verts[3][0][0] = clip_x0; verts[3][0][1] = clip_y1; tex = color ? 2 : 1; verts[0][tex][0] = sLeft; /* v[0].attr[tex].s */ verts[0][tex][1] = tTop; /* v[0].attr[tex].t */ verts[1][tex][0] = sRight; verts[1][tex][1] = tTop; verts[2][tex][0] = sRight; verts[2][tex][1] = tBot; verts[3][tex][0] = sLeft; verts[3][tex][1] = tBot; /* same for all verts: */ if (color) { for (i = 0; i < 4; i++) { verts[i][0][2] = z; /*Z*/ verts[i][0][3] = 1.0f; /*W*/ verts[i][1][0] = color[0]; verts[i][1][1] = color[1]; verts[i][1][2] = color[2]; verts[i][1][3] = color[3]; verts[i][2][2] = 0.0f; /*R*/ verts[i][2][3] = 1.0f; /*Q*/ } } else { for (i = 0; i < 4; i++) { verts[i][0][2] = z; /*Z*/ verts[i][0][3] = 1.0f; /*W*/ verts[i][1][2] = 0.0f; /*R*/ verts[i][1][3] = 1.0f; /*Q*/ } } } { struct pipe_buffer *buf; /* allocate/load buffer object with vertex data */ buf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(verts)); st_no_flush_pipe_buffer_write(st, buf, 0, sizeof(verts), verts); util_draw_vertex_buffer(pipe, buf, 0, PIPE_PRIM_QUADS, 4, /* verts */ 3); /* attribs/vert */ pipe_buffer_reference(&buf, NULL); } }