void intel_batchbuffer_data(struct intel_batchbuffer *batch, const void *data, GLuint bytes, GLuint flags) { assert((bytes & 3) == 0); intel_batchbuffer_require_space(batch, bytes, flags); __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; }
void intel_batchbuffer_data(struct intel_batchbuffer *batch, const void *data, unsigned int bytes) { assert((bytes & 3) == 0); intel_batchbuffer_require_space(batch, bytes); memcpy(batch->ptr, data, bytes); batch->ptr += bytes; }
static void intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer *batch, int flag, unsigned int size) { assert(!batch->atomic); intel_batchbuffer_check_batchbuffer_flag(batch, flag); intel_batchbuffer_require_space(batch, size); batch->atomic = 1; }
void intel_batchbuffer_data(struct intel_batchbuffer *batch, void *data, unsigned int size) { assert((size & 3) == 0); intel_batchbuffer_require_space(batch, size); assert(batch->ptr); memcpy(batch->ptr, data, size); batch->ptr += size; }
void brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) { struct gl_context *ctx = &brw->ctx; uint32_t estimated_max_batch_usage = 1500; bool check_aperture_failed_once = false; /* Flush the sampler and render caches. We definitely need to flush the * sampler cache so that we get updated contents from the render cache for * the glBlitFramebuffer() source. Also, we are sometimes warned in the * docs to flush the cache between reinterpretations of the same surface * data with different formats, which blorp does for stencil and depth * data. */ brw_emit_mi_flush(brw); retry: intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING); intel_batchbuffer_save_state(brw); drm_intel_bo *saved_bo = brw->batch.bo; uint32_t saved_used = USED_BATCH(brw->batch); uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; switch (brw->gen) { case 6: gen6_blorp_exec(brw, params); break; case 7: gen7_blorp_exec(brw, params); break; default: /* BLORP is not supported before Gen6. */ unreachable("not reached"); } /* Make sure we didn't wrap the batch unintentionally, and make sure we * reserved enough space that a wrap will never happen. */ assert(brw->batch.bo == saved_bo); assert((USED_BATCH(brw->batch) - saved_used) * 4 + (saved_state_batch_offset - brw->batch.state_batch_offset) < estimated_max_batch_usage); /* Shut up compiler warnings on release build */ (void)saved_bo; (void)saved_used; (void)saved_state_batch_offset; /* Check if the blorp op we just did would make our batch likely to fail to * map all the BOs into the GPU at batch exec time later. If so, flush the * batch and try again with nothing else in the batch. */ if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!check_aperture_failed_once) { check_aperture_failed_once = true; intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: blorp emit exceeded available aperture space\n"); } } if (unlikely(brw->always_flush_batch)) intel_batchbuffer_flush(brw); /* We've smashed all state compared to what the normal 3D pipeline * rendering tracks for GL. */ brw->ctx.NewDriverState = ~0ull; brw->no_depth_or_stencil = false; brw->ib.type = -1; /* Flush the sampler cache so any texturing from the destination is * coherent. */ brw_emit_mi_flush(brw); }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static GLboolean brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); hw_prim = brw_set_prim(brw, &prim[i]); if (brw->state.dirty.brw) { brw_validate_state(brw); /* Various fallback checks: */ if (brw->intel.Fallback) goto out; /* Check that we can fit our state in with our existing batchbuffer, or * flush otherwise. */ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, brw->state.validated_bo_count)) { static GLboolean warned; intel_batchbuffer_flush(intel); /* Validate the state after we flushed the batch (which would have * changed the set of dirty state). If we still fail to * check_aperture, warn of what's happening, but attempt to continue * on since it may succeed anyway, and the user would probably rather * see a failure and a warning than a fallback. */ brw_validate_state(brw); if (!warned && dri_bufmgr_check_aperture_space(brw->state.validated_bos, brw->state.validated_bo_count)) { warn = GL_TRUE; warned = GL_TRUE; } } intel->no_batch_wrap = GL_TRUE; brw_upload_state(brw); } if (intel->gen >= 7) gen7_emit_prim(brw, &prim[i], hw_prim); else brw_emit_prim(brw, &prim[i], hw_prim); intel->no_batch_wrap = GL_FALSE; retval = GL_TRUE; } if (intel->always_flush_batch) intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); if (!retval) DBG("%s failed\n", __FUNCTION__); return retval; }
/* Push the state into the sarea and/or texture memory. */ static void i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; int i; int ret, count; GLuint dirty; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive * emit header. * * It might be better to talk about explicit places where * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. * * Set the space as LOOP_CLIPRECTS now, since that's what our primitives * will be emitted under. */ intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, LOOP_CLIPRECTS); count = 0; again: dirty = get_dirty(state); ret = 0; if (dirty & I915_UPLOAD_BUFFERS) { ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); if (state->depth_region) ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); } if (dirty & I915_UPLOAD_TEX_ALL) { for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); } } } if (ret) { if (count == 0) { count++; intel_batchbuffer_flush(intel->batch); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); assert(0); } } /* work out list of buffers to emit */ /* Do this here as we may have flushed the batchbuffer above, * causing more state to be dirty! */ dirty = get_dirty(state); state->emitted |= dirty; assert(get_dirty(state) == 0); if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); if (dirty & I915_UPLOAD_INVARIENT) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); i915_emit_invarient_state(intel); } if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); } if (dirty & I915_UPLOAD_BUFFERS) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, state->depth_region->draw_offset); } OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); OUT_BATCH(state->Buffer[I915_DESTREG_DV1]); OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]); OUT_BATCH(state->Buffer[I915_DESTREG_SR0]); OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); ADVANCE_BATCH(); } if (dirty & I915_UPLOAD_STIPPLE) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); emit(intel, state->Stipple, sizeof(state->Stipple)); } if (dirty & I915_UPLOAD_FOG) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_FOG:\n"); emit(intel, state->Fog, sizeof(state->Fog)); } /* Combine all the dirty texture state into a single command to * avoid lockups on I915 hardware. */ if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) nr++; BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, state->tex_offset[i]); } else if (state == &i915->meta) { assert(i == 0); OUT_BATCH(0); } else { OUT_BATCH(state->tex_offset[i]); } OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); } ADVANCE_BATCH(); BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); } ADVANCE_BATCH(); }
/* Copy BitBlt */ bool intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, GLshort dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { GLuint CMD, BR13, pass = 0; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; drm_intel_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; BATCH_LOCALS; if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return false; } if (src_tiling != I915_TILING_NONE) { if (src_offset & 4095) return false; } if (dst_y_tiled || src_y_tiled) return false; /* do space check before going any further */ do { aper_array[0] = intel->batch.bo; aper_array[1] = dst_buffer; aper_array[2] = src_buffer; if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(intel); pass++; } else break; } while (pass < 2); if (pass >= 2) return false; intel_batchbuffer_require_space(intel, 8 * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop * the low bits. */ if (src_pitch % 4 != 0 || dst_pitch % 4 != 0) return false; /* For big formats (such as floating point), do the copy using 16 or 32bpp * and multiply the coordinates. */ if (cpp > 4) { if (cpp % 4 == 2) { dst_x *= cpp / 2; dst_x2 *= cpp / 2; src_x *= cpp / 2; cpp = 2; } else { assert(cpp % 4 == 0); dst_x *= cpp / 4; dst_x2 *= cpp / 4; src_x *= cpp / 4; cpp = 4; } } BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; switch (cpp) { case 1: case 2: CMD = XY_SRC_COPY_BLT_CMD; break; case 4: CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return false; } if (dst_y2 <= dst_y || dst_x2 <= dst_x) { return true; } assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH(8); OUT_BATCH(CMD | (8 - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); OUT_RELOC_FENCED(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); OUT_RELOC_FENCED(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel); return true; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static void brw_try_draw_prims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures(brw); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_render_state() * because it may flush the batchbuffer for a blit, affecting the state * flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs(brw, arrays); brw->ib.ib = ib; brw->ctx.NewDriverState |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->ctx.NewDriverState |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * (sampler_state_size + sizeof(struct gen5_sampler_default_color)); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex) { brw->num_instances = prims[i].num_instances; brw->basevertex = prims[i].basevertex; if (i > 0) { /* For i == 0 we just did this before the loop */ brw->ctx.NewDriverState |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } } brw->draw.gl_basevertex = prims[i].indexed ? prims[i].basevertex : prims[i].start; drm_intel_bo_unreference(brw->draw.draw_params_bo); if (prims[i].is_indirect) { /* Point draw_params_bo at the indirect buffer. */ brw->draw.draw_params_bo = intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; drm_intel_bo_reference(brw->draw.draw_params_bo); brw->draw.draw_params_offset = prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); } else { /* Set draw_params_bo to NULL so brw_prepare_vertices knows it * has to upload gl_BaseVertex and such if they're needed. */ brw->draw.draw_params_bo = NULL; brw->draw.draw_params_offset = 0; } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->ctx.NewDriverState. */ if (brw->ctx.NewDriverState) { brw->no_batch_wrap = true; brw_upload_render_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: Single primitive emit exceeded " "available aperture space\n"); } } /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. */ if (brw->ctx.NewDriverState) brw_render_state_finished(brw); } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return; }
bool intelEmitImmediateColorExpandBlit(struct brw_context *brw, GLuint cpp, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, struct brw_bo *dst_buffer, GLuint dst_offset, enum isl_tiling dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, enum gl_logicop_mode logic_op) { const struct gen_device_info *devinfo = &brw->screen->devinfo; int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; if (dst_tiling != ISL_TILING_LINEAR) { if (dst_offset & 4095) return false; if (dst_tiling == ISL_TILING_Y0) return false; } assert((unsigned) logic_op <= 0x0f); assert(dst_pitch > 0); if (w < 0 || h < 0) return true; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", __func__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) + (3 * 4) + dwords * 4); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; if (dst_tiling != ISL_TILING_LINEAR) { opcode |= XY_DST_TILED; dst_pitch /= 4; } br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); br13 |= br13_for_cpp(cpp); blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ if (dst_tiling != ISL_TILING_LINEAR) blit_cmd |= XY_DST_TILED; BEGIN_BATCH_BLT(xy_setup_blt_length + 3); OUT_BATCH(opcode | (xy_setup_blt_length - 2)); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ if (devinfo->gen >= 8) { OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ if (devinfo->gen >= 8) OUT_BATCH(0); OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X)); OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X)); ADVANCE_BATCH(); intel_batchbuffer_data(brw, src_bits, dwords * 4); brw_emit_mi_flush(brw); return true; }
/* Copy BitBlt */ static bool emit_copy_blit(struct brw_context *brw, GLuint cpp, int32_t src_pitch, struct brw_bo *src_buffer, GLuint src_offset, enum isl_tiling src_tiling, int32_t dst_pitch, struct brw_bo *dst_buffer, GLuint dst_offset, enum isl_tiling dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, enum gl_logicop_mode logic_op) { const struct gen_device_info *devinfo = &brw->screen->devinfo; GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; bool dst_y_tiled = dst_tiling == ISL_TILING_Y0; bool src_y_tiled = src_tiling == ISL_TILING_Y0; uint32_t src_tile_w, src_tile_h; uint32_t dst_tile_w, dst_tile_h; if ((dst_y_tiled || src_y_tiled) && devinfo->gen < 6) return false; const unsigned bo_sizes = dst_buffer->size + src_buffer->size; /* do space check before going any further */ if (!brw_batch_has_aperture_space(brw, bo_sizes)) intel_batchbuffer_flush(brw); if (!brw_batch_has_aperture_space(brw, bo_sizes)) return false; unsigned length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, length * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); intel_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h); intel_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h); /* For Tiled surfaces, the pitch has to be a multiple of the Tile width * (X direction width of the Tile). This is ensured while allocating the * buffer object. */ assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0); assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0); /* For big formats (such as floating point), do the copy using 16 or * 32bpp and multiply the coordinates. */ if (cpp > 4) { if (cpp % 4 == 2) { dst_x *= cpp / 2; dst_x2 *= cpp / 2; src_x *= cpp / 2; cpp = 2; } else { assert(cpp % 4 == 0); dst_x *= cpp / 4; dst_x2 *= cpp / 4; src_x *= cpp / 4; cpp = 4; } } if (!alignment_valid(brw, dst_offset, dst_tiling)) return false; if (!alignment_valid(brw, src_offset, src_tiling)) return false; /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop * the low bits. Offsets must be naturally aligned. */ if (src_pitch % 4 != 0 || src_offset % cpp != 0 || dst_pitch % 4 != 0 || dst_offset % cpp != 0) return false; assert(cpp <= 4); BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp); /* For tiled source and destination, pitch value should be specified * as a number of Dwords. */ if (dst_tiling != ISL_TILING_LINEAR) dst_pitch /= 4; if (src_tiling != ISL_TILING_LINEAR) src_pitch /= 4; if (dst_y2 <= dst_y || dst_x2 <= dst_x) return true; assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X)); OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X)); if (devinfo->gen >= 8) { OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X)); OUT_BATCH((uint16_t)src_pitch); if (devinfo->gen >= 8) { OUT_RELOC64(src_buffer, 0, src_offset); } else { OUT_RELOC(src_buffer, 0, src_offset); } ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); brw_emit_mi_flush(brw); return true; }
void intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, struct buffer *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) { struct xy_setup_blit setup; struct xy_text_immediate_blit text; int dwords = ((src_size + 7) & ~7) / 4; assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); if (w < 0 || h < 0) return; dst_pitch *= cpp; if (dst_tiled) dst_pitch /= 4; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); memset(&setup, 0, sizeof(setup)); setup.br0.client = CLIENT_2D; setup.br0.opcode = OPCODE_XY_SETUP_BLT; setup.br0.write_alpha = (cpp == 4); setup.br0.write_rgb = (cpp == 4); setup.br0.dst_tiled = dst_tiled; setup.br0.length = (sizeof(setup) / sizeof(int)) - 2; setup.br13.dest_pitch = dst_pitch; setup.br13.rop = translate_raster_op(logic_op); setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565; setup.br13.clipping_enable = 0; setup.br13.mono_source_transparency = 1; setup.dw2.clip_y1 = 0; setup.dw2.clip_x1 = 0; setup.dw3.clip_y2 = 100; setup.dw3.clip_x2 = 100; setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset; setup.background_color = 0; setup.foreground_color = fg_color; setup.pattern_base_addr = 0; memset(&text, 0, sizeof(text)); text.dw0.client = CLIENT_2D; text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT; text.dw0.pad0 = 0; text.dw0.byte_packed = 1; /* ?maybe? */ text.dw0.pad1 = 0; text.dw0.dst_tiled = dst_tiled; text.dw0.pad2 = 0; text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords; text.dw1.dest_y1 = y; /* duplicates info in setup blit */ text.dw1.dest_x1 = x; text.dw2.dest_y2 = y + h; text.dw2.dest_x2 = x + w; intel_batchbuffer_require_space( intel->batch, sizeof(setup) + sizeof(text) + dwords, INTEL_BATCH_NO_CLIPRECTS ); intel_batchbuffer_data( intel->batch, &setup, sizeof(setup), INTEL_BATCH_NO_CLIPRECTS ); intel_batchbuffer_data( intel->batch, &text, sizeof(text), INTEL_BATCH_NO_CLIPRECTS ); intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, INTEL_BATCH_NO_CLIPRECTS ); }
/* Push the state into the sarea and/or texture memory. */ static void i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; int i; GLuint dirty; BATCH_LOCALS; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap. * * It might be better to talk about explicit places where * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0); /* Do this here as we may have flushed the batchbuffer above, * causing more state to be dirty! */ dirty = get_dirty(state); if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); if (dirty & I915_UPLOAD_INVARIENT) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); i915_emit_invarient_state(intel); } if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); } if (dirty & I915_UPLOAD_BUFFERS) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, 0); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, state->depth_region->draw_offset); } OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); OUT_BATCH(state->Buffer[I915_DESTREG_DV1]); OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]); OUT_BATCH(state->Buffer[I915_DESTREG_SR0]); OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); ADVANCE_BATCH(); } if (dirty & I915_UPLOAD_STIPPLE) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); emit(intel, state->Stipple, sizeof(state->Stipple)); } if (dirty & I915_UPLOAD_FOG) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_FOG:\n"); emit(intel, state->Fog, sizeof(state->Fog)); } /* Combine all the dirty texture state into a single command to * avoid lockups on I915 hardware. */ if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) nr++; BEGIN_BATCH(2 + nr * 3, 0); OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, state->tex_offset[i]); } else { assert(i == 0); assert(state == &i915->meta); OUT_BATCH(0); } OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); } ADVANCE_BATCH(); BEGIN_BATCH(2 + nr * 3, 0); OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); } ADVANCE_BATCH(); }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_state() because it * may flush the batchbuffer for a blit, affecting the state flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Resolves must occur after updating renderbuffers, updating context state, * and finalizing textures but before setting up any hardware state for * this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances) { brw->num_instances = prims[i].num_instances; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->basevertex != prims[i].basevertex) { brw->basevertex = prims[i].basevertex; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { brw->no_batch_wrap = true; brw_upload_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(brw) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }
/* Copy BitBlt */ void intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, GLboolean src_tiled, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; int ret; BATCH_LOCALS; /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); again: ret = dri_bufmgr_check_aperture_space(dst_buffer); ret |= dri_bufmgr_check_aperture_space(src_buffer); if (ret) { intel_batchbuffer_flush(intel->batch); goto again; } DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); src_pitch *= cpp; dst_pitch *= cpp; BR13 = translate_raster_op(logic_op) << 16; switch (cpp) { case 1: case 2: case 3: BR13 |= (1 << 24); CMD = XY_SRC_COPY_BLT_CMD; break; case 4: BR13 |= (1 << 24) | (1 << 25); CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return; } #ifndef I915 if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } if (src_tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } #endif if (dst_y2 <= dst_y || dst_x2 <= dst_x) { return; } /* Initial y values don't seem to work with negative pitches. If * we adjust the offsets manually (below), it seems to work fine. * * On the other hand, if we always adjust, the hardware doesn't * know which blit directions to use, so overlapping copypixels get * the wrong result. */ if (dst_pitch > 0 && src_pitch > 0) { assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); OUT_BATCH(CMD); OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset); ADVANCE_BATCH(); } else { assert(dst_x < dst_x2); assert(h > 0); BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); OUT_BATCH(CMD); OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } }
/* Push the state into the sarea and/or texture memory. */ static void i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = &i915->state; int i, count, aper_count; GLuint dirty; drm_intel_bo *aper_array[3 + I915_TEX_UNITS]; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive * emit header. * * It might be better to talk about explicit places where * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ intel_batchbuffer_require_space(intel, get_state_size(state) + INTEL_PRIM_EMIT_SIZE); count = 0; again: if (intel->batch.bo == NULL) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); assert(0); } aper_count = 0; dirty = get_dirty(state); aper_array[aper_count++] = intel->batch.bo; if (dirty & I915_UPLOAD_BUFFERS) { if (state->draw_region) aper_array[aper_count++] = state->draw_region->bo; if (state->depth_region) aper_array[aper_count++] = state->depth_region->bo; } if (dirty & I915_UPLOAD_TEX_ALL) { for (i = 0; i < I915_TEX_UNITS; i++) { if (dirty & I915_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { aper_array[aper_count++] = state->tex_buffer[i]; } } } } if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; intel_batchbuffer_flush(intel); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); assert(0); } } /* work out list of buffers to emit */ /* Do this here as we may have flushed the batchbuffer above, * causing more state to be dirty! */ dirty = get_dirty(state); state->emitted |= dirty; assert(get_dirty(state) == 0); if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); if (dirty & I915_UPLOAD_INVARIENT) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); i915_emit_invarient_state(intel); } if (dirty & I915_UPLOAD_RASTER_RULES) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n"); emit(intel, state->RasterRules, sizeof(state->RasterRules)); } if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); } if (dirty & I915_UPLOAD_BLEND) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BLEND:\n"); emit(intel, state->Blend, sizeof(state->Blend)); } if (dirty & I915_UPLOAD_BUFFERS) { GLuint count; if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); count = 17; if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP) count++; BEGIN_BATCH(count); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); if (state->draw_region) { OUT_RELOC(state->draw_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } else { OUT_BATCH(0); } OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); if (state->depth_region) { OUT_RELOC(state->depth_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } else { OUT_BATCH(0); } OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); OUT_BATCH(state->Buffer[I915_DESTREG_DV1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR0]); OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]); if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP) OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]); OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]); OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]); OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]); OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]); OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]); ADVANCE_BATCH(); } if (dirty & I915_UPLOAD_STIPPLE) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); emit(intel, state->Stipple, sizeof(state->Stipple)); } /* Combine all the dirty texture state into a single command to * avoid lockups on I915 hardware. */ if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; GLuint unwind; for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) nr++; BEGIN_BATCH(2 + nr * 3); OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { OUT_RELOC(state->tex_buffer[i], I915_GEM_DOMAIN_SAMPLER, 0, state->tex_offset[i]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); } ADVANCE_BATCH(); unwind = intel->batch.used; BEGIN_BATCH(2 + nr * 3); OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); } ADVANCE_BATCH(); if (i915->last_sampler && memcmp(intel->batch.map + i915->last_sampler, intel->batch.map + unwind, (2 + nr*3)*sizeof(int)) == 0) intel->batch.used = unwind; else i915->last_sampler = unwind; }
/** * Copy the back color buffer to the front color buffer. * Used for SwapBuffers(). */ void intelCopyBuffer(const __DRIdrawablePrivate * dPriv, const drm_clip_rect_t * rect) { struct intel_context *intel; const intelScreenPrivate *intelScreen; int ret; DBG("%s\n", __FUNCTION__); assert(dPriv); intel = intelScreenContext(dPriv->driScreenPriv->private); if (!intel) return; intelScreen = intel->intelScreen; if (intel->last_swap_fence) { dri_fence_wait(intel->last_swap_fence); dri_fence_unreference(intel->last_swap_fence); intel->last_swap_fence = NULL; } intel->last_swap_fence = intel->first_swap_fence; intel->first_swap_fence = NULL; /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ LOCK_HARDWARE(intel); if (dPriv && dPriv->numClipRects) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_region *src, *dst; int nbox = dPriv->numClipRects; drm_clip_rect_t *pbox = dPriv->pClipRects; int cpp; int src_pitch, dst_pitch; unsigned short src_x, src_y; int BR13, CMD; int i; src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); src_pitch = src->pitch * src->cpp; dst_pitch = dst->pitch * dst->cpp; cpp = src->cpp; ASSERT(intel_fb); ASSERT(intel_fb->Base.Name == 0); /* Not a user-created FBO */ ASSERT(src); ASSERT(dst); ASSERT(src->cpp == dst->cpp); if (cpp == 2) { BR13 = (0xCC << 16) | (1 << 24); CMD = XY_SRC_COPY_BLT_CMD; } else { BR13 = (0xCC << 16) | (1 << 24) | (1 << 25); CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; } #ifndef I915 if (src->tiled) { CMD |= XY_SRC_TILED; src_pitch /= 4; } if (dst->tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } #endif /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); again: ret = dri_bufmgr_check_aperture_space(dst->buffer); ret |= dri_bufmgr_check_aperture_space(src->buffer); if (ret) { intel_batchbuffer_flush(intel->batch); goto again; } for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; if (rect) { if (!intel_intersect_cliprects(&box, &box, rect)) continue; } if (box.x1 >= box.x2 || box.y1 >= box.y2) continue; assert(box.x1 < box.x2); assert(box.y1 < box.y2); src_x = box.x1 - dPriv->x + dPriv->backX; src_y = box.y1 - dPriv->y + dPriv->backY; BEGIN_BATCH(8, REFERENCES_CLIPRECTS); OUT_BATCH(CMD); OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); } if (intel->first_swap_fence) dri_fence_unreference(intel->first_swap_fence); intel_batchbuffer_flush(intel->batch); intel->first_swap_fence = intel->batch->last_fence; if (intel->first_swap_fence) dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); /* Resolves must occur after updating state and finalizing textures but * before setting up any hardware state for this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); intel_batchbuffer_save_state(intel); if (intel->gen < 6) brw_set_prim(brw, &prim[i]); else gen6_set_prim(brw, &prim[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { intel->no_batch_wrap = true; brw_upload_state(brw); if (unlikely(brw->intel.Fallback)) { intel->no_batch_wrap = false; retval = false; goto out; } } if (intel->gen >= 7) gen7_emit_prim(brw, &prim[i], brw->primitive); else brw_emit_prim(brw, &prim[i], brw->primitive); intel->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(intel); intel_batchbuffer_flush(intel); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(intel) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } if (!_mesa_meta_in_progress(ctx)) brw_update_primitive_count(brw, &prim[i]); } if (intel->always_flush_batch) intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }
void intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) { int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); if (w < 0 || h < 0) return; dst_pitch *= cpp; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); intel_batchbuffer_require_space( intel->batch, (8 * 4) + (3 * 4) + dwords, NO_LOOP_CLIPRECTS ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; #ifndef I915 if (dst_tiled) { opcode |= XY_DST_TILED; dst_pitch /= 4; } #endif br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); if (cpp == 2) br13 |= BR13_565; else br13 |= BR13_8888; blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ if (dst_tiled) blit_cmd |= XY_DST_TILED; BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); ADVANCE_BATCH(); intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, NO_LOOP_CLIPRECTS ); }
bool intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) { int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return false; if (dst_tiling == I915_TILING_Y) return false; } assert((logic_op >= GL_CLEAR) && (logic_op <= (GL_CLEAR + 0x0f))); assert(dst_pitch > 0); if (w < 0 || h < 0) return true; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); intel_batchbuffer_require_space(intel, (8 * 4) + (3 * 4) + dwords * 4); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); br13 |= br13_for_cpp(cpp); blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; BEGIN_BATCH(8 + 3); OUT_BATCH(opcode | (8 - 2)); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ OUT_RELOC_FENCED(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); ADVANCE_BATCH(); intel_batchbuffer_data(intel, src_bits, dwords * 4); intel_batchbuffer_emit_mi_flush(intel); return true; }
/* Copy BitBlt */ GLboolean intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, GLshort dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { GLuint CMD, BR13, pass = 0; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; drm_intel_bo *aper_array[3]; BATCH_LOCALS; if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; if (dst_tiling == I915_TILING_Y) return GL_FALSE; } if (src_tiling != I915_TILING_NONE) { if (src_offset & 4095) return GL_FALSE; if (src_tiling == I915_TILING_Y) return GL_FALSE; } /* do space check before going any further */ do { aper_array[0] = intel->batch.bo; aper_array[1] = dst_buffer; aper_array[2] = src_buffer; if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(intel); pass++; } else break; } while (pass < 2); if (pass >= 2) return GL_FALSE; intel_batchbuffer_require_space(intel, 8 * 4, true); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); src_pitch *= cpp; dst_pitch *= cpp; /* For big formats (such as floating point), do the copy using 32bpp and * multiply the coordinates. */ if (cpp > 4) { assert(cpp % 4 == 0); dst_x *= cpp / 4; dst_x2 *= cpp / 4; src_x *= cpp / 4; cpp = 4; } BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; switch (cpp) { case 1: case 2: CMD = XY_SRC_COPY_BLT_CMD; break; case 4: CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return GL_FALSE; } #ifndef I915 if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } if (src_tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } #endif if (dst_y2 <= dst_y || dst_x2 <= dst_x) { return GL_TRUE; } assert(dst_x < dst_x2); assert(dst_y < dst_y2); BEGIN_BATCH_BLT(8); OUT_BATCH(CMD); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); OUT_RELOC_FENCED(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); OUT_RELOC_FENCED(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel); return GL_TRUE; }