/* Helper function for si_blit_decompress_zs_in_place. */ static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx, struct r600_texture *texture, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer; unsigned fully_decompressed_mask = 0; if (!level_mask) return; if (planes & PIPE_MASK_S) sctx->db_flush_stencil_inplace = true; if (planes & PIPE_MASK_Z) sctx->db_flush_depth_inplace = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; while (level_mask) { unsigned level = u_bit_scan(&level_mask); surf_tmpl.u.tex.level = level; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl); si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, sctx->custom_dsa_flush, 1.0f); si_blitter_end(&sctx->b.b); pipe_surface_reference(&zsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { fully_decompressed_mask |= 1u << level; } } if (planes & PIPE_MASK_Z) texture->dirty_level_mask &= ~fully_decompressed_mask; if (planes & PIPE_MASK_S) texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; sctx->db_flush_depth_inplace = false; sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); }
static void update_textures(struct st_context *st, enum pipe_shader_type shader_stage, const struct gl_program *prog, struct pipe_sampler_view **sampler_views, unsigned *out_num_textures) { const GLuint old_max = *out_num_textures; GLbitfield samplers_used = prog->SamplersUsed; GLbitfield texel_fetch_samplers = prog->info.textures_used_by_txf; GLbitfield free_slots = ~prog->SamplersUsed; GLbitfield external_samplers_used = prog->ExternalSamplersUsed; GLuint unit; if (samplers_used == 0x0 && old_max == 0) return; unsigned num_textures = 0; /* prog->sh.data is NULL if it's ARB_fragment_program */ bool glsl130 = (prog->sh.data ? prog->sh.data->Version : 0) >= 130; /* loop over sampler units (aka tex image units) */ for (unit = 0; samplers_used || unit < old_max; unit++, samplers_used >>= 1, texel_fetch_samplers >>= 1) { struct pipe_sampler_view *sampler_view = NULL; if (samplers_used & 1) { const GLuint texUnit = prog->SamplerUnits[unit]; /* The EXT_texture_sRGB_decode extension says: * * "The conversion of sRGB color space components to linear color * space is always performed if the texel lookup function is one * of the texelFetch builtin functions. * * Otherwise, if the texel lookup function is one of the texture * builtin functions or one of the texture gather functions, the * conversion of sRGB color space components to linear color space * is controlled by the TEXTURE_SRGB_DECODE_EXT parameter. * * If the TEXTURE_SRGB_DECODE_EXT parameter is DECODE_EXT, the * conversion of sRGB color space components to linear color space * is performed. * * If the TEXTURE_SRGB_DECODE_EXT parameter is SKIP_DECODE_EXT, * the value is returned without decoding. However, if the texture * is also [statically] accessed with a texelFetch function, then * the result of texture builtin functions and/or texture gather * functions may be returned with decoding or without decoding." * * Note: the "statically" will be added to the language per * https://cvs.khronos.org/bugzilla/show_bug.cgi?id=14934 * * So we simply ignore the setting entirely for samplers that are * (statically) accessed with a texelFetch function. */ st_update_single_texture(st, &sampler_view, texUnit, glsl130, texel_fetch_samplers & 1); num_textures = unit + 1; } pipe_sampler_view_reference(&(sampler_views[unit]), sampler_view); } /* For any external samplers with multiplaner YUV, stuff the additional * sampler views we need at the end. * * Trying to cache the sampler view in the stObj looks painful, so just * re-create the sampler view for the extra planes each time. Main use * case is video playback (ie. fps games wouldn't be using this) so I * guess no point to try to optimize this feature. */ while (unlikely(external_samplers_used)) { GLuint unit = u_bit_scan(&external_samplers_used); GLuint extra = 0; struct st_texture_object *stObj = st_get_texture_object(st->ctx, prog, unit); struct pipe_sampler_view tmpl; if (!stObj) continue; /* use original view as template: */ tmpl = *sampler_views[unit]; switch (st_get_view_format(stObj)) { case PIPE_FORMAT_NV12: /* we need one additional R8G8 view: */ tmpl.format = PIPE_FORMAT_RG88_UNORM; tmpl.swizzle_g = PIPE_SWIZZLE_Y; /* tmpl from Y plane is R8 */ extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl); break; case PIPE_FORMAT_IYUV: /* we need two additional R8 views: */ tmpl.format = PIPE_FORMAT_R8_UNORM; extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl); extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next->next, &tmpl); break; default: break; } num_textures = MAX2(num_textures, extra + 1); } cso_set_sampler_views(st->cso_context, shader_stage, num_textures, sampler_views); *out_num_textures = num_textures; }
static void update_textures(struct st_context *st, gl_shader_stage mesa_shader, const struct gl_program *prog, unsigned max_units, struct pipe_sampler_view **sampler_views, unsigned *num_textures) { const GLuint old_max = *num_textures; GLbitfield samplers_used = prog->SamplersUsed; GLbitfield free_slots = ~prog->SamplersUsed; GLbitfield external_samplers_used = prog->ExternalSamplersUsed; GLuint unit; enum pipe_shader_type shader_stage = st_shader_stage_to_ptarget(mesa_shader); if (samplers_used == 0x0 && old_max == 0) return; *num_textures = 0; /* loop over sampler units (aka tex image units) */ for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) { struct pipe_sampler_view *sampler_view = NULL; if (samplers_used & 1) { /* prog->sh.data is NULL if it's ARB_fragment_program */ unsigned glsl_version = prog->sh.data ? prog->sh.data->Version : 0; const GLuint texUnit = prog->SamplerUnits[unit]; GLboolean retval; retval = update_single_texture(st, &sampler_view, texUnit, glsl_version); if (retval == GL_FALSE) continue; *num_textures = unit + 1; } else if (samplers_used == 0 && unit >= old_max) { /* if we've reset all the old views and we have no more new ones */ break; } pipe_sampler_view_reference(&(sampler_views[unit]), sampler_view); } /* For any external samplers with multiplaner YUV, stuff the additional * sampler views we need at the end. * * Trying to cache the sampler view in the stObj looks painful, so just * re-create the sampler view for the extra planes each time. Main use * case is video playback (ie. fps games wouldn't be using this) so I * guess no point to try to optimize this feature. */ while (unlikely(external_samplers_used)) { GLuint unit = u_bit_scan(&external_samplers_used); GLuint extra = 0; struct st_texture_object *stObj = st_get_texture_object(st->ctx, prog, unit); struct pipe_sampler_view tmpl; if (!stObj) continue; /* use original view as template: */ tmpl = *sampler_views[unit]; switch (st_get_view_format(stObj)) { case PIPE_FORMAT_NV12: /* we need one additional R8G8 view: */ tmpl.format = PIPE_FORMAT_RG88_UNORM; tmpl.swizzle_g = PIPE_SWIZZLE_Y; /* tmpl from Y plane is R8 */ extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl); break; case PIPE_FORMAT_IYUV: /* we need two additional R8 views: */ tmpl.format = PIPE_FORMAT_R8_UNORM; extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl); extra = u_bit_scan(&free_slots); sampler_views[extra] = st->pipe->create_sampler_view(st->pipe, stObj->pt->next->next, &tmpl); break; default: break; } *num_textures = MAX2(*num_textures, extra + 1); } cso_set_sampler_views(st->cso_context, shader_stage, *num_textures, sampler_views); }
/** * Gathers together all the uniform values into a block of memory to be * uploaded into the CURBE, then emits the state packet telling the hardware * the new location. */ static void brw_upload_constant_buffer(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_CURBE_OFFSETS */ const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); gl_constant_value *buf; GLuint i; gl_clip_plane *clip_planes; if (sz == 0) { goto emit; } buf = intel_upload_space(brw, bufsz, 64, &brw->curbe.curbe_bo, &brw->curbe.curbe_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* fragment shader constants */ if (brw->curbe.wm_size) { _mesa_load_state_parameters(ctx, brw->fragment_program->Parameters); /* BRW_NEW_CURBE_OFFSETS */ GLuint offset = brw->curbe.wm_start * 16; /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->wm.base.prog_data->nr_params; i++) { buf[offset + i] = *brw->wm.base.prog_data->param[i]; } } /* clipper constants */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLbitfield mask; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0].f = fixed_plane[i][0]; buf[offset + i * 4 + 1].f = fixed_plane[i][1]; buf[offset + i * 4 + 2].f = fixed_plane[i][2]; buf[offset + i * 4 + 3].f = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ clip_planes = brw_select_clip_planes(ctx); mask = ctx->Transform.ClipPlanesEnabled; while (mask) { const int j = u_bit_scan(&mask); buf[offset + i * 4 + 0].f = clip_planes[j][0]; buf[offset + i * 4 + 1].f = clip_planes[j][1]; buf[offset + i * 4 + 2].f = clip_planes[j][2]; buf[offset + i * 4 + 3].f = clip_planes[j][3]; i++; } } /* vertex shader constants */ if (brw->curbe.vs_size) { _mesa_load_state_parameters(ctx, brw->vertex_program->Parameters); GLuint offset = brw->curbe.vs_start * 16; /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->vs.base.prog_data->nr_params; i++) { buf[offset + i] = *brw->vs.base.prog_data->param[i]; } } if (0) { for (i = 0; i < sz*16; i+=4) fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f); } /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ emit: /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8 * (CONSTANT_BUFFER (CURBE Load)): * * "Modifying the CS URB allocation via URB_FENCE invalidates any * previous CURBE entries. Therefore software must subsequently * [re]issue a CONSTANT_BUFFER command before CURBE data can be used * in the pipeline." */ BEGIN_BATCH(2); if (brw->curbe.total_size == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); /* Work around a Broadwater/Crestline depth interpolator bug. The * following sequence will cause GPU hangs: * * 1. Change state so that all depth related fields in CC_STATE are * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled. * 2. Emit a CONSTANT_BUFFER packet. * 3. Draw via 3DPRIMITIVE. * * The recommended workaround is to emit a non-pipelined state change after * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline. * * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small), * and always emit it when "PS Use Source Depth" is set. We could be more * precise, but the additional complexity is probably not worth it. * * BRW_NEW_FRAGMENT_PROGRAM */ if (brw->gen == 4 && !brw->is_g4x && (brw->fragment_program->info.inputs_read & (1 << VARYING_SLOT_POS))) { BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } }
static enum pipe_error emit_consts_vgpu10(struct svga_context *svga, unsigned shader) { enum pipe_error ret; unsigned dirty_constbufs; unsigned enabled_constbufs; /* Emit 0th constant buffer (with extra constants) */ ret = emit_constbuf_vgpu10(svga, shader); if (ret != PIPE_OK) { return ret; } enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u; /* Emit other constant buffers (UBOs) */ dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u; while (dirty_constbufs) { unsigned index = u_bit_scan(&dirty_constbufs); unsigned offset = svga->curr.constbufs[shader][index].buffer_offset; unsigned size = svga->curr.constbufs[shader][index].buffer_size; struct svga_buffer *buffer = svga_buffer(svga->curr.constbufs[shader][index].buffer); struct svga_winsys_surface *handle; if (buffer) { handle = svga_buffer_handle(svga, &buffer->b.b); enabled_constbufs |= 1 << index; } else { handle = NULL; enabled_constbufs &= ~(1 << index); assert(offset == 0); assert(size == 0); } if (size % 16 != 0) { /* GL's buffer range sizes can be any number of bytes but the * SVGA3D device requires a multiple of 16 bytes. */ const unsigned total_size = buffer->b.b.width0; if (offset + align(size, 16) <= total_size) { /* round up size to multiple of 16 */ size = align(size, 16); } else { /* round down to mulitple of 16 (this may cause rendering problems * but should avoid a device error). */ size &= ~15; } } assert(size % 16 == 0); ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc, index, svga_shader_type(shader), handle, offset, size); if (ret != PIPE_OK) return ret; svga->hud.num_const_buf_updates++; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; svga->state.dirty_constbufs[shader] = 0; return ret; }
/** * Update the gallium driver's sampler state for fragment, vertex or * geometry shader stage. */ static void update_shader_samplers(struct st_context *st, enum pipe_shader_type shader_stage, const struct gl_program *prog, unsigned max_units, struct pipe_sampler_state *samplers, unsigned *num_samplers) { GLbitfield samplers_used = prog->SamplersUsed; GLbitfield free_slots = ~prog->SamplersUsed; GLbitfield external_samplers_used = prog->ExternalSamplersUsed; GLuint unit; const GLuint old_max = *num_samplers; const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS]; if (*num_samplers == 0 && samplers_used == 0x0) return; *num_samplers = 0; /* loop over sampler units (aka tex image units) */ for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) { struct pipe_sampler_state *sampler = samplers + unit; if (samplers_used & 1) { const GLuint texUnit = prog->SamplerUnits[unit]; convert_sampler(st, sampler, texUnit); states[unit] = sampler; *num_samplers = unit + 1; } else if (samplers_used != 0 || unit < old_max) { states[unit] = NULL; } else { /* if we've reset all the old samplers and we have no more new ones */ break; } } /* For any external samplers with multiplaner YUV, stuff the additional * sampler states we need at the end. * * Just re-use the existing sampler-state from the primary slot. */ while (unlikely(external_samplers_used)) { GLuint unit = u_bit_scan(&external_samplers_used); GLuint extra = 0; struct st_texture_object *stObj = st_get_texture_object(st->ctx, prog, unit); struct pipe_sampler_state *sampler = samplers + unit; if (!stObj) continue; switch (st_get_view_format(stObj)) { case PIPE_FORMAT_NV12: /* we need one additional sampler: */ extra = u_bit_scan(&free_slots); states[extra] = sampler; break; case PIPE_FORMAT_IYUV: /* we need two additional samplers: */ extra = u_bit_scan(&free_slots); states[extra] = sampler; extra = u_bit_scan(&free_slots); states[extra] = sampler; break; default: break; } *num_samplers = MAX2(*num_samplers, extra + 1); } cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states); }
static void make_state_key( struct gl_context *ctx, struct state_key *key ) { const struct gl_program *fp = ctx->FragmentProgram._Current; GLbitfield mask; memset(key, 0, sizeof(struct state_key)); /* This now relies on texenvprogram.c being active: */ assert(fp); key->need_eye_coords = ctx->_NeedEyeCoords; key->fragprog_inputs_read = fp->info.inputs_read; key->varying_vp_inputs = ctx->varying_vp_inputs; if (ctx->RenderMode == GL_FEEDBACK) { /* make sure the vertprog emits color and tex0 */ key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); } key->separate_specular = (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR); if (ctx->Light.Enabled) { key->light_global_enabled = 1; if (ctx->Light.Model.LocalViewer) key->light_local_viewer = 1; if (ctx->Light.Model.TwoSide) key->light_twoside = 1; if (ctx->Light.ColorMaterialEnabled) { key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; } mask = ctx->Light._EnabledLights; while (mask) { const int i = u_bit_scan(&mask); struct gl_light *light = &ctx->Light.Light[i]; key->unit[i].light_enabled = 1; if (light->EyePosition[3] == 0.0F) key->unit[i].light_eyepos3_is_zero = 1; if (light->SpotCutoff == 180.0F) key->unit[i].light_spotcutoff_is_180 = 1; if (light->ConstantAttenuation != 1.0F || light->LinearAttenuation != 0.0F || light->QuadraticAttenuation != 0.0F) key->unit[i].light_attenuated = 1; } if (check_active_shininess(ctx, key, 0)) { key->material_shininess_is_zero = 0; } else if (key->light_twoside && check_active_shininess(ctx, key, 1)) { key->material_shininess_is_zero = 0; } else { key->material_shininess_is_zero = 1; } } if (ctx->Transform.Normalize) key->normalize = 1; if (ctx->Transform.RescaleNormals) key->rescale_normals = 1; if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { key->fog_source_is_depth = 1; key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode); } if (ctx->Point._Attenuated) key->point_attenuated = 1; if (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled) key->point_array = 1; if (ctx->Texture._TexGenEnabled || ctx->Texture._TexMatEnabled || ctx->Texture._MaxEnabledTexImageUnit != -1) key->texture_enabled_global = 1; mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace; while (mask) { const int i = u_bit_scan(&mask); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_Current) key->unit[i].texunit_really_enabled = 1; if (ctx->Point.PointSprite) if (ctx->Point.CoordReplace & (1u << i)) key->unit[i].coord_replace = 1; if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) key->unit[i].texmat_enabled = 1; if (texUnit->TexGenEnabled) { key->unit[i].texgen_enabled = 1; key->unit[i].texgen_mode0 = translate_texgen( texUnit->TexGenEnabled & (1<<0), texUnit->GenS.Mode ); key->unit[i].texgen_mode1 = translate_texgen( texUnit->TexGenEnabled & (1<<1), texUnit->GenT.Mode ); key->unit[i].texgen_mode2 = translate_texgen( texUnit->TexGenEnabled & (1<<2), texUnit->GenR.Mode ); key->unit[i].texgen_mode3 = translate_texgen( texUnit->TexGenEnabled & (1<<3), texUnit->GenQ.Mode ); } } }
static void gather_intrinsic_info(const nir_intrinsic_instr *instr, struct ac_shader_info *info) { switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_sample: info->ps.needs_sample_positions = true; break; case nir_intrinsic_load_draw_id: info->vs.needs_draw_id = true; break; case nir_intrinsic_load_instance_id: info->vs.needs_instance_id = true; break; case nir_intrinsic_load_num_work_groups: info->cs.uses_grid_size = true; break; case nir_intrinsic_load_local_invocation_id: case nir_intrinsic_load_work_group_id: { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); while (mask) { unsigned i = u_bit_scan(&mask); if (instr->intrinsic == nir_intrinsic_load_work_group_id) info->cs.uses_block_id[i] = true; else info->cs.uses_thread_id[i] = true; } break; } case nir_intrinsic_load_local_invocation_index: info->cs.uses_local_invocation_idx = true; break; case nir_intrinsic_load_sample_id: info->ps.force_persample = true; break; case nir_intrinsic_load_sample_pos: info->ps.force_persample = true; break; case nir_intrinsic_load_view_index: info->needs_multiview_view_index = true; break; case nir_intrinsic_load_invocation_id: info->uses_invocation_id = true; break; case nir_intrinsic_load_primitive_id: info->uses_prim_id = true; break; case nir_intrinsic_load_push_constant: info->loads_push_constants = true; break; case nir_intrinsic_vulkan_resource_index: info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr)); break; case nir_intrinsic_image_load: case nir_intrinsic_image_store: case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_atomic_min: case nir_intrinsic_image_atomic_max: case nir_intrinsic_image_atomic_and: case nir_intrinsic_image_atomic_or: case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_atomic_comp_swap: case nir_intrinsic_image_size: { const struct glsl_type *type = instr->variables[0]->var->type; if(instr->variables[0]->deref.child) type = instr->variables[0]->deref.child->type; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); if (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS) info->ps.uses_input_attachments = true; mark_sampler_desc(instr->variables[0]->var, info); break; } default: break; } }
static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset, uint32_t num_outputs, struct ac_vs_exports *processed, struct ac_vs_exp_inst *exp) { unsigned p, copy_back_channels = 0; /* See if the output is already in the list of processed outputs. * The LLVMValueRef comparison relies on SSA. */ for (p = 0; p < processed->num; p++) { bool different = false; for (unsigned j = 0; j < 4; j++) { struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j]; struct ac_vs_exp_chan *c2 = &exp->chan[j]; /* Treat undef as a match. */ if (c2->type == AC_IR_UNDEF) continue; /* If c1 is undef but c2 isn't, we can copy c2 to c1 * and consider the instruction duplicated. */ if (c1->type == AC_IR_UNDEF) { copy_back_channels |= 1 << j; continue; } /* Test whether the channels are not equal. */ if (c1->type != c2->type || (c1->type == AC_IR_CONST && c1->const_float != c2->const_float) || (c1->type == AC_IR_VALUE && c1->value != c2->value)) { different = true; break; } } if (!different) break; copy_back_channels = 0; } if (p == processed->num) return false; /* If a match was found, but the matching export has undef where the new * one has a normal value, copy the normal value to the undef channel. */ struct ac_vs_exp_inst *match = &processed->exp[p]; while (copy_back_channels) { unsigned chan = u_bit_scan(©_back_channels); assert(match->chan[chan].type == AC_IR_UNDEF); LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan, exp->chan[chan].value); match->chan[chan] = exp->chan[chan]; } /* The PARAM export is duplicated. Kill it. */ LLVMInstructionEraseFromParent(exp->inst); /* Change OFFSET to the matching export. */ for (unsigned i = 0; i < num_outputs; i++) { if (vs_output_param_offset[i] == exp->offset) { vs_output_param_offset[i] = match->offset; break; } } return true; }
/** * Mark all states that have the resource dirty. */ void ilo_mark_states_with_resource_dirty(struct ilo_context *ilo, const struct pipe_resource *res) { uint32_t states = 0; unsigned sh, i; if (res->target == PIPE_BUFFER) { uint32_t vb_mask = ilo->vb.enabled_mask; while (vb_mask) { const unsigned idx = u_bit_scan(&vb_mask); if (ilo->vb.states[idx].buffer == res) { states |= ILO_DIRTY_VB; break; } } if (ilo->ib.buffer == res) { states |= ILO_DIRTY_IB; /* * finalize_index_buffer() has an optimization that clears * ILO_DIRTY_IB when the HW states do not change. However, it fails * to flush the VF cache when the HW states do not change, but the * contents of the IB has changed. Here, we set the index size to an * invalid value to avoid the optimization. */ ilo->ib.hw_index_size = 0; } for (i = 0; i < ilo->so.count; i++) { if (ilo->so.states[i]->buffer == res) { states |= ILO_DIRTY_SO; break; } } } for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { for (i = 0; i < ilo->view[sh].count; i++) { struct pipe_sampler_view *view = ilo->view[sh].states[i]; if (view->texture == res) { static const unsigned view_dirty_bits[PIPE_SHADER_TYPES] = { [PIPE_SHADER_VERTEX] = ILO_DIRTY_VIEW_VS, [PIPE_SHADER_FRAGMENT] = ILO_DIRTY_VIEW_FS, [PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS, [PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS, }; states |= view_dirty_bits[sh]; break; } } if (res->target == PIPE_BUFFER) { for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) { struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i]; if (cbuf->resource == res) { states |= ILO_DIRTY_CBUF; break; } } } }
static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct r600_resource *rbuffer = r600_resource(transfer->resource); struct r600_context *rctx = (struct r600_context*)pipe; uint8_t *data; if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { assert(transfer->usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { unsigned i, mask; /* Discard the buffer. */ pb_reference(&rbuffer->buf, NULL); /* Create a new one in the same pipe_resource. */ /* XXX We probably want a different alignment for buffers and textures. */ r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096, rbuffer->b.b.bind, rbuffer->b.b.usage); /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ mask = rctx->vertex_buffer_state.enabled_mask; while (mask) { i = u_bit_scan(&mask); if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { rctx->vertex_buffer_state.dirty_mask |= 1 << i; r600_vertex_buffers_dirty(rctx); } } /* Streamout buffers. */ for (i = 0; i < rctx->num_so_targets; i++) { if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) { r600_context_streamout_end(rctx); rctx->streamout_start = TRUE; rctx->streamout_append_bitmask = ~0; } } /* Constant buffers. */ r600_set_constants_dirty_if_bound(rctx, rbuffer); } } #if 0 /* this is broken (see Bug 53130) */ else if ((transfer->usage & PIPE_TRANSFER_DISCARD_RANGE) && !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) && rctx->screen->has_streamout && /* The buffer range must be aligned to 4. */ transfer->box.x % 4 == 0 && transfer->box.width % 4 == 0) { assert(transfer->usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; rtransfer->staging = (struct r600_resource*) pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STAGING, transfer->box.width); return rctx->ws->buffer_map(rtransfer->staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); } } #endif data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage); if (!data) return NULL; return (uint8_t*)data + transfer->box.x; }
static void nouveau_enable(struct gl_context *ctx, GLenum cap, GLboolean state) { GLbitfield mask; switch (cap) { case GL_ALPHA_TEST: context_dirty(ctx, ALPHA_FUNC); break; case GL_BLEND: context_dirty(ctx, BLEND_EQUATION); break; case GL_COLOR_LOGIC_OP: context_dirty(ctx, LOGIC_OPCODE); break; case GL_COLOR_MATERIAL: context_dirty(ctx, COLOR_MATERIAL); context_dirty(ctx, MATERIAL_FRONT_AMBIENT); context_dirty(ctx, MATERIAL_BACK_AMBIENT); context_dirty(ctx, MATERIAL_FRONT_DIFFUSE); context_dirty(ctx, MATERIAL_BACK_DIFFUSE); context_dirty(ctx, MATERIAL_FRONT_SPECULAR); context_dirty(ctx, MATERIAL_BACK_SPECULAR); break; case GL_COLOR_SUM_EXT: context_dirty(ctx, FRAG); context_dirty(ctx, LIGHT_MODEL); break; case GL_CULL_FACE: context_dirty(ctx, CULL_FACE); break; case GL_DEPTH_TEST: context_dirty(ctx, DEPTH); break; case GL_DITHER: context_dirty(ctx, DITHER); break; case GL_FOG: context_dirty(ctx, FOG); context_dirty(ctx, FRAG); context_dirty(ctx, MODELVIEW); break; case GL_LIGHT0: case GL_LIGHT1: case GL_LIGHT2: case GL_LIGHT3: case GL_LIGHT4: case GL_LIGHT5: case GL_LIGHT6: case GL_LIGHT7: context_dirty(ctx, MODELVIEW); context_dirty(ctx, LIGHT_ENABLE); context_dirty_i(ctx, LIGHT_SOURCE, cap - GL_LIGHT0); context_dirty(ctx, MATERIAL_FRONT_AMBIENT); context_dirty(ctx, MATERIAL_BACK_AMBIENT); context_dirty(ctx, MATERIAL_FRONT_DIFFUSE); context_dirty(ctx, MATERIAL_BACK_DIFFUSE); context_dirty(ctx, MATERIAL_FRONT_SPECULAR); context_dirty(ctx, MATERIAL_BACK_SPECULAR); context_dirty(ctx, MATERIAL_FRONT_SHININESS); context_dirty(ctx, MATERIAL_BACK_SHININESS); break; case GL_LIGHTING: context_dirty(ctx, FRAG); context_dirty(ctx, MODELVIEW); context_dirty(ctx, LIGHT_MODEL); context_dirty(ctx, LIGHT_ENABLE); mask = ctx->Light._EnabledLights; while (mask) { const int i = u_bit_scan(&mask); context_dirty_i(ctx, LIGHT_SOURCE, i); } context_dirty(ctx, MATERIAL_FRONT_AMBIENT); context_dirty(ctx, MATERIAL_BACK_AMBIENT); context_dirty(ctx, MATERIAL_FRONT_DIFFUSE); context_dirty(ctx, MATERIAL_BACK_DIFFUSE); context_dirty(ctx, MATERIAL_FRONT_SPECULAR); context_dirty(ctx, MATERIAL_BACK_SPECULAR); context_dirty(ctx, MATERIAL_FRONT_SHININESS); context_dirty(ctx, MATERIAL_BACK_SHININESS); break; case GL_LINE_SMOOTH: context_dirty(ctx, LINE_MODE); break; case GL_NORMALIZE: context_dirty(ctx, LIGHT_ENABLE); break; case GL_POINT_SMOOTH: context_dirty(ctx, POINT_MODE); break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: context_dirty(ctx, POLYGON_OFFSET); break; case GL_POLYGON_SMOOTH: context_dirty(ctx, POLYGON_MODE); break; case GL_SCISSOR_TEST: context_dirty(ctx, SCISSOR); break; case GL_STENCIL_TEST: context_dirty(ctx, STENCIL_FUNC); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: case GL_TEXTURE_RECTANGLE: context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit); context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit); break; case GL_TEXTURE_GEN_S: case GL_TEXTURE_GEN_T: case GL_TEXTURE_GEN_R: case GL_TEXTURE_GEN_Q: context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit); context_dirty(ctx, MODELVIEW); break; } }
static void si_blit_decompress_color(struct pipe_context *ctx, struct r600_texture *rtex, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, bool need_dcc_decompress) { struct si_context *sctx = (struct si_context *)ctx; void* custom_blend; unsigned layer, checked_last_layer, max_layer; unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); if (!need_dcc_decompress) level_mask &= rtex->dirty_level_mask; if (!level_mask) return; if (rtex->dcc_offset && need_dcc_decompress) { custom_blend = sctx->custom_blend_dcc_decompress; /* disable levels without DCC */ for (int i = first_level; i <= last_level; i++) { if (!rtex->dcc_offset || !rtex->surface.level[i].dcc_enabled) level_mask &= ~(1 << i); } } else if (rtex->fmask.size) { custom_blend = sctx->custom_blend_decompress; } else { custom_blend = sctx->custom_blend_fastclear; } while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&rtex->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { struct pipe_surface *cbsurf, surf_tmpl; surf_tmpl.format = rtex->resource.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); si_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { rtex->dirty_level_mask &= ~(1 << level); } } }
static unsigned si_blit_dbcb_copy(struct si_context *sctx, struct r600_texture *src, struct r600_texture *dst, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample) { struct pipe_surface surf_tmpl = {{0}}; unsigned layer, sample, checked_last_layer, max_layer; unsigned fully_copied_levels = 0; if (planes & PIPE_MASK_Z) sctx->dbcb_depth_copy_enabled = true; if (planes & PIPE_MASK_S) sctx->dbcb_stencil_copy_enabled = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&src->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); surf_tmpl.u.tex.level = level; for (layer = first_layer; layer <= checked_last_layer; layer++) { struct pipe_surface *zsurf, *cbsurf; surf_tmpl.format = src->resource.b.b.format; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl); surf_tmpl.format = dst->resource.b.b.format; cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl); for (sample = first_sample; sample <= last_sample; sample++) { if (sample != sctx->dbcb_copy_sample) { sctx->dbcb_copy_sample = sample; si_mark_atom_dirty(sctx, &sctx->db_render_state); } si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, sctx->custom_dsa_flush, 1.0f); si_blitter_end(&sctx->b.b); } pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } if (first_layer == 0 && last_layer >= max_layer && first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b)) fully_copied_levels |= 1u << level; } sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); return fully_copied_levels; }
void st_validate_state( struct st_context *st, enum st_pipeline pipeline ) { struct gl_context *ctx = st->ctx; uint64_t dirty, pipeline_mask; uint32_t dirty_lo, dirty_hi; /* Get Mesa driver state. * * Inactive states are shader states not used by shaders at the moment. */ st->dirty |= ctx->NewDriverState & st->active_states & ST_ALL_STATES_MASK; ctx->NewDriverState = 0; /* Get pipeline state. */ switch (pipeline) { case ST_PIPELINE_RENDER: if (st->ctx->API == API_OPENGL_COMPAT) check_attrib_edgeflag(st); if (st->gfx_shaders_may_be_dirty) { check_program_state(st); st->gfx_shaders_may_be_dirty = false; } st_manager_validate_framebuffers(st); pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK; break; case ST_PIPELINE_CLEAR: st_manager_validate_framebuffers(st); pipeline_mask = ST_PIPELINE_CLEAR_STATE_MASK; break; case ST_PIPELINE_META: if (st->gfx_shaders_may_be_dirty) { check_program_state(st); st->gfx_shaders_may_be_dirty = false; } st_manager_validate_framebuffers(st); pipeline_mask = ST_PIPELINE_META_STATE_MASK; break; case ST_PIPELINE_UPDATE_FRAMEBUFFER: st_manager_validate_framebuffers(st); pipeline_mask = ST_PIPELINE_UPDATE_FB_STATE_MASK; break; case ST_PIPELINE_COMPUTE: { struct st_compute_program *old_cp = st->cp; struct gl_program *new_cp = ctx->ComputeProgram._Current; if (new_cp != &old_cp->Base) { if (old_cp) st->dirty |= old_cp->affected_states; assert(new_cp); st->dirty |= st_compute_program(new_cp)->affected_states; } st->compute_shader_may_be_dirty = false; /* * We add the ST_NEW_FB_STATE bit here as well, because glBindFramebuffer * acts as a barrier that breaks feedback loops between the framebuffer * and textures bound to the framebuffer, even when those textures are * accessed by compute shaders; so we must inform the driver of new * framebuffer state. */ pipeline_mask = ST_PIPELINE_COMPUTE_STATE_MASK | ST_NEW_FB_STATE; break; } default: unreachable("Invalid pipeline specified"); } dirty = st->dirty & pipeline_mask; if (!dirty) return; dirty_lo = dirty; dirty_hi = dirty >> 32; /* Update states. * * Don't use u_bit_scan64, it may be slower on 32-bit. */ while (dirty_lo) update_functions[u_bit_scan(&dirty_lo)](st); while (dirty_hi) update_functions[32 + u_bit_scan(&dirty_hi)](st); /* Clear the render or compute state bits. */ st->dirty &= ~pipeline_mask; }
static void gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, struct ac_shader_info *info) { switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_sample: info->ps.needs_sample_positions = true; break; case nir_intrinsic_load_draw_id: info->vs.needs_draw_id = true; break; case nir_intrinsic_load_instance_id: info->vs.needs_instance_id = true; break; case nir_intrinsic_load_num_work_groups: info->cs.uses_grid_size = true; break; case nir_intrinsic_load_local_invocation_id: case nir_intrinsic_load_work_group_id: { unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); while (mask) { unsigned i = u_bit_scan(&mask); if (instr->intrinsic == nir_intrinsic_load_work_group_id) info->cs.uses_block_id[i] = true; else info->cs.uses_thread_id[i] = true; } break; } case nir_intrinsic_load_local_invocation_index: info->cs.uses_local_invocation_idx = true; break; case nir_intrinsic_load_sample_id: info->ps.force_persample = true; break; case nir_intrinsic_load_sample_pos: info->ps.force_persample = true; break; case nir_intrinsic_load_view_index: info->needs_multiview_view_index = true; break; case nir_intrinsic_load_invocation_id: info->uses_invocation_id = true; break; case nir_intrinsic_load_primitive_id: info->uses_prim_id = true; break; case nir_intrinsic_load_push_constant: info->loads_push_constants = true; break; case nir_intrinsic_vulkan_resource_index: info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr)); break; case nir_intrinsic_image_load: case nir_intrinsic_image_store: case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_atomic_min: case nir_intrinsic_image_atomic_max: case nir_intrinsic_image_atomic_and: case nir_intrinsic_image_atomic_or: case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_atomic_comp_swap: case nir_intrinsic_image_size: { const struct glsl_type *type = instr->variables[0]->var->type; if(instr->variables[0]->deref.child) type = instr->variables[0]->deref.child->type; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); if (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS) info->ps.uses_input_attachments = true; mark_sampler_desc(instr->variables[0]->var, info); if (nir_intrinsic_image_store || nir_intrinsic_image_atomic_add || nir_intrinsic_image_atomic_min || nir_intrinsic_image_atomic_max || nir_intrinsic_image_atomic_and || nir_intrinsic_image_atomic_or || nir_intrinsic_image_atomic_xor || nir_intrinsic_image_atomic_exchange || nir_intrinsic_image_atomic_comp_swap) { if (nir->info.stage == MESA_SHADER_FRAGMENT) info->ps.writes_memory = true; } break; } case nir_intrinsic_store_ssbo: case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: case nir_intrinsic_ssbo_atomic_umin: case nir_intrinsic_ssbo_atomic_imax: case nir_intrinsic_ssbo_atomic_umax: case nir_intrinsic_ssbo_atomic_and: case nir_intrinsic_ssbo_atomic_or: case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: if (nir->info.stage == MESA_SHADER_FRAGMENT) info->ps.writes_memory = true; break; case nir_intrinsic_load_var: if (nir->info.stage == MESA_SHADER_VERTEX) { nir_deref_var *dvar = instr->variables[0]; nir_variable *var = dvar->var; if (var->data.mode == nir_var_shader_in) { unsigned idx = var->data.location; uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); info->vs.input_usage_mask[idx] |= mask; } } break; default: break; } }