/** * Map a teximage in a mipmap tree. * \param row_stride returns row stride in bytes * \param image_stride returns image stride in bytes (for 3D textures). * \param image_offsets pointer to array of pixel offsets from the returned * pointer to each depth image * \return address of mapping */ GLubyte * intel_miptree_image_map(struct intel_context * intel, struct intel_mipmap_tree * mt, GLuint face, GLuint level, GLuint * row_stride, GLuint * image_offsets) { GLuint x, y; DBG("%s \n", __FUNCTION__); if (row_stride) *row_stride = mt->region->pitch * mt->cpp; if (mt->target == GL_TEXTURE_3D) { int i; for (i = 0; i < mt->level[level].depth; i++) { intel_miptree_get_image_offset(mt, level, face, i, &x, &y); image_offsets[i] = x + y * mt->region->pitch; } return intel_region_map(intel, mt->region); } else { assert(mt->level[level].depth == 1); intel_miptree_get_image_offset(mt, level, face, 0, &x, &y); image_offsets[0] = 0; return intel_region_map(intel, mt->region) + (x + y * mt->region->pitch) * mt->cpp; } }
/** * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT */ static void intel_tex_map_image_for_swrast(struct intel_context *intel, struct intel_texture_image *intel_image, GLbitfield mode) { int level; int face; struct intel_mipmap_tree *mt; unsigned int x, y; if (!intel_image || !intel_image->mt) return; level = intel_image->base.Base.Level; face = intel_image->base.Base.Face; mt = intel_image->mt; for (int i = 0; i < mt->level[level].depth; i++) intel_miptree_slice_resolve_depth(intel, mt, level, i); if (mt->target == GL_TEXTURE_3D || mt->target == GL_TEXTURE_2D_ARRAY || mt->target == GL_TEXTURE_1D_ARRAY) { int i; /* ImageOffsets[] is only used for swrast's fetch_texel_3d, so we can't * share code with the normal path. */ for (i = 0; i < mt->level[level].depth; i++) { intel_miptree_get_image_offset(mt, level, i, &x, &y); intel_image->base.ImageOffsets[i] = x + y * (mt->region->pitch / mt->region->cpp); } DBG("%s \n", __FUNCTION__); intel_image->base.Map = intel_miptree_map_raw(intel, mt); } else { assert(intel_image->base.Base.Depth == 1); intel_miptree_get_image_offset(mt, level, face, &x, &y); DBG("%s: (%d,%d) -> (%d, %d)/%d\n", __FUNCTION__, face, level, x, y, mt->region->pitch); intel_image->base.Map = intel_miptree_map_raw(intel, mt) + x * mt->cpp + y * mt->region->pitch; } assert(mt->region->pitch % mt->region->cpp == 0); intel_image->base.RowStride = mt->region->pitch / mt->region->cpp; }
/** * Copy mipmap image between trees */ void intel_miptree_image_copy(struct intel_context *intel, struct intel_mipmap_tree *dst, GLuint face, GLuint level, struct intel_mipmap_tree *src) { GLuint width = src->level[level].width; GLuint height = src->level[level].height; GLuint depth = src->level[level].depth; GLuint src_x, src_y, dst_x, dst_y; GLuint i; GLboolean success; if (dst->compressed) { GLuint align_w, align_h; intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h); height = (height + 3) / 4; width = ALIGN(width, align_w); } intel_prepare_render(intel); for (i = 0; i < depth; i++) { intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y); intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); success = intel_region_copy(intel, dst->region, 0, dst_x, dst_y, src->region, 0, src_x, src_y, width, height, GL_FALSE, GL_COPY); if (!success) { GLubyte *src_ptr, *dst_ptr; src_ptr = intel_region_map(intel, src->region); dst_ptr = intel_region_map(intel, dst->region); _mesa_copy_rect(dst_ptr, dst->cpp, dst->region->pitch, dst_x, dst_y, width, height, src_ptr, src->region->pitch, src_x, src_y); intel_region_unmap(intel, src->region); intel_region_unmap(intel, dst->region); } } }
static void intel_miptree_copy_slice(struct intel_context *intel, struct intel_mipmap_tree *dst_mt, struct intel_mipmap_tree *src_mt, int level, int face, int depth) { mesa_format format = src_mt->format; uint32_t width = src_mt->level[level].width; uint32_t height = src_mt->level[level].height; int slice; if (face > 0) slice = face; else slice = depth; assert(depth < src_mt->level[level].depth); assert(src_mt->format == dst_mt->format); if (dst_mt->compressed) { height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; width = ALIGN(width, dst_mt->align_w); } uint32_t dst_x, dst_y, src_x, src_y; intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", _mesa_get_format_name(src_mt->format), src_mt, src_x, src_y, src_mt->region->pitch, _mesa_get_format_name(dst_mt->format), dst_mt, dst_x, dst_y, dst_mt->region->pitch, width, height); if (!intel_miptree_blit(intel, src_mt, level, slice, 0, 0, false, dst_mt, level, slice, 0, 0, false, width, height, GL_COPY)) { perf_debug("miptree validate blit for %s failed\n", _mesa_get_format_name(format)); intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, width, height); } }
/** * Upload data for a particular image. */ void intel_miptree_image_data(struct intel_context *intel, struct intel_mipmap_tree *dst, GLuint face, GLuint level, void *src, GLuint src_row_pitch, GLuint src_image_pitch) { const GLuint depth = dst->level[level].depth; GLuint i; DBG("%s: %d/%d\n", __FUNCTION__, face, level); for (i = 0; i < depth; i++) { GLuint dst_x, dst_y, height; intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); height = dst->level[level].height; if(dst->compressed) height = (height + 3) / 4; intel_region_data(intel, dst->region, 0, dst_x, dst_y, src, src_row_pitch, 0, 0, /* source x, y */ dst->level[level].width, height); /* width, height */ src = (char *)src + src_image_pitch * dst->cpp; } }
/** * Sets up a DRIImage structure to point to our shared image in a region */ static void intel_setup_image_from_mipmap_tree(struct intel_context *intel, __DRIimage *image, struct intel_mipmap_tree *mt, GLuint level, GLuint zoffset) { unsigned int draw_x, draw_y; uint32_t mask_x, mask_y; intel_miptree_check_level_layer(mt, level, zoffset); intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false); intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y); image->width = mt->level[level].width; image->height = mt->level[level].height; image->tile_x = draw_x & mask_x; image->tile_y = draw_y & mask_y; image->offset = intel_region_get_aligned_offset(mt->region, draw_x & ~mask_x, draw_y & ~mask_y, false); intel_region_reference(&image->region, mt->region); }
/** * When stencil is mapped as Y-tiled render target the mip-level offsets * calculated for the Y-tiling do not always match the offsets in W-tiling. * Therefore the sampling engine cannot be used for individual mip-level * access but the program needs to do it internally. This can be achieved * by shifting the coordinates of the blit rectangle here. */ static void adjust_mip_level(const struct intel_mipmap_tree *mt, unsigned level, unsigned layer, struct blit_dims *dims) { unsigned x_offset; unsigned y_offset; intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset); dims->dst_x0 += x_offset; dims->dst_y0 += y_offset; dims->dst_x1 += x_offset; dims->dst_y1 += y_offset; }
void brw_blorp_mip_info::set(struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer) { intel_miptree_check_level_layer(mt, level, layer); this->mt = mt; this->level = level; this->layer = layer; this->width = mt->level[level].width; this->height = mt->level[level].height; intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset); }
void intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb) { unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ intel_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer, &dst_x, &dst_y); irb->draw_x = dst_x; irb->draw_y = dst_y; }
/** * Rendering with tiled buffers requires that the base address of the buffer * be aligned to a page boundary. For renderbuffers, and sometimes with * textures, we may want the surface to point at a texture image level that * isn't at a page boundary. * * This function returns an appropriately-aligned base offset * according to the tiling restrictions, plus any required x/y offset * from there. */ uint32_t intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, GLuint level, GLuint slice, uint32_t *tile_x, uint32_t *tile_y) { struct intel_region *region = mt->region; uint32_t x, y; uint32_t mask_x, mask_y; intel_region_get_tile_masks(region, &mask_x, &mask_y, false); intel_miptree_get_image_offset(mt, level, slice, &x, &y); *tile_x = x & mask_x; *tile_y = y & mask_y; return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y, false); }
static void intel_miptree_map_gtt(struct intel_context *intel, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) { unsigned int bw, bh; void *base; unsigned int image_x, image_y; int x = map->x; int y = map->y; /* For compressed formats, the stride is the number of bytes per * row of blocks. intel_miptree_get_image_offset() already does * the divide. */ _mesa_get_format_block_size(mt->format, &bw, &bh); assert(y % bh == 0); y /= bh; base = intel_miptree_map_raw(intel, mt) + mt->offset; if (base == NULL) map->ptr = NULL; else { /* Note that in the case of cube maps, the caller must have passed the * slice number referencing the face. */ intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); x += image_x; y += image_y; map->stride = mt->region->pitch; map->ptr = base + y * map->stride + x * mt->cpp; } DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), x, y, map->ptr, map->stride); }
void brw_blorp_mip_info::set(struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer) { /* Layer is a physical layer, so if this is a 2D multisample array texture * using INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, then it had better * be a multiple of num_samples. */ if (mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS || mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { assert(layer % mt->num_samples == 0); } intel_miptree_check_level_layer(mt, level, layer); this->mt = mt; this->level = level; this->layer = layer; this->width = minify(mt->physical_width0, level - mt->first_level); this->height = minify(mt->physical_height0, level - mt->first_level); intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset); }
bool intel_miptree_copy(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, uint32_t src_width, uint32_t src_height) { /* The blitter doesn't understand multisampling at all. */ if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) return false; if (src_mt->format == MESA_FORMAT_S_UINT8) return false; /* The blitter has no idea about HiZ or fast color clears, so we need to * resolve the miptrees before we do anything. */ intel_miptree_access_raw(brw, src_mt, src_level, src_slice, false); intel_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); uint32_t src_image_x, src_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); if (_mesa_is_format_compressed(src_mt->format)) { GLuint bw, bh; _mesa_get_format_block_size(src_mt->format, &bw, &bh); /* Compressed textures need not have dimensions that are a multiple of * the block size. Rectangles in compressed textures do need to be a * multiple of the block size. The one exception is that the right and * bottom edges may be at the right or bottom edge of the miplevel even * if it's not aligned. */ assert(src_x % bw == 0); assert(src_y % bh == 0); assert(src_width % bw == 0 || src_x + src_width == minify(src_mt->surf.logical_level0_px.width, src_level)); assert(src_height % bh == 0 || src_y + src_height == minify(src_mt->surf.logical_level0_px.height, src_level)); src_x /= (int)bw; src_y /= (int)bh; src_width = DIV_ROUND_UP(src_width, (int)bw); src_height = DIV_ROUND_UP(src_height, (int)bh); } src_x += src_image_x; src_y += src_image_y; uint32_t dst_image_x, dst_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); if (_mesa_is_format_compressed(dst_mt->format)) { GLuint bw, bh; _mesa_get_format_block_size(dst_mt->format, &bw, &bh); assert(dst_x % bw == 0); assert(dst_y % bh == 0); dst_x /= (int)bw; dst_y /= (int)bh; } dst_x += dst_image_x; dst_y += dst_image_y; return emit_miptree_blit(brw, src_mt, src_x, src_y, dst_mt, dst_x, dst_y, src_width, src_height, false, COLOR_LOGICOP_COPY); }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static GLboolean i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { GLcontext *ctx = &intel->ctx; struct i830_context *i830 = i830_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; GLuint dst_x, dst_y; memset(state, 0, sizeof(state)); /*We need to refcount these. */ if (i830->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i830->state.tex_buffer[unit]); i830->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][intelObj->firstLevel]; intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->buffer); i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; /* XXX: This calculation is probably broken for tiled images with * a non-page-aligned offset. */ i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch; format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat); state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; } state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); { if (tObj->Target == GL_TEXTURE_CUBE_MAP) state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) | CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE); else state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit)); } { GLuint minFilt, mipFilt, magFilt; switch (tObj->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return GL_FALSE; } if (tObj->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; } else { switch (tObj->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return GL_FALSE; } } lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0); if (lodbias < -64) lodbias = -64; if (lodbias > 63) lodbias = 63; state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); #if 0 /* YUV conversion: */ if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel - intelObj->firstLevel) * 4) << TM0S3_MIN_MIP_SHIFT; state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | (mipFilt << TM0S3_MIP_FILTER_SHIFT) | (magFilt << TM0S3_MAG_FILTER_SHIFT)); } { GLenum ws = tObj->WrapS; GLenum wt = tObj->WrapT; /* 3D textures not available on i830 */ if (tObj->Target == GL_TEXTURE_3D) return GL_FALSE; state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | MAP_UNIT(unit) | ENABLE_TEXCOORD_PARAMS | ss3 | ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt)) | ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(translate_wrap_mode (ws))); } /* convert border color from float to ubyte */ CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]); CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]); CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]); CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]); state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3], border[0], border[1], border[2]); I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); /* memcmp was already disabled, but definitely won't work as the * region might now change and that wouldn't be detected: */ I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); return GL_TRUE; }
void brw_workaround_depthstencil_alignment(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; bool rebase_depth = false; bool rebase_stencil = false; struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = NULL; struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; uint32_t stencil_draw_x = 0, stencil_draw_y = 0; if (depth_irb) depth_mt = depth_irb->mt; uint32_t tile_mask_x, tile_mask_y; brw_get_depthstencil_tile_masks(depth_mt, stencil_mt, &tile_mask_x, &tile_mask_y); if (depth_irb) { tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth * Coordinate Offset X/Y": * * "The 3 LSBs of both offsets must be zero to ensure correct * alignment" */ if (tile_x & 7 || tile_y & 7) rebase_depth = true; /* We didn't even have intra-tile offsets before g45. */ if (intel->gen == 4 && !intel->is_g4x) { if (tile_x || tile_y) rebase_depth = true; } if (rebase_depth) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to fix alignment (depth tile offset %d,%d)\n", depth_irb->mt_level, tile_x, tile_y); intel_renderbuffer_move_to_temp(intel, depth_irb); /* In the case of stencil_irb being the same packed depth/stencil * texture but not the same rb, make it point at our rebased mt, too. */ if (stencil_irb && stencil_irb != depth_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } stencil_mt = get_stencil_miptree(stencil_irb); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); int stencil_tile_x = stencil_draw_x & tile_mask_x; int stencil_tile_y = stencil_draw_y & tile_mask_y; /* If stencil doesn't match depth, then we'll need to rebase stencil * as well. (if we hadn't decided to rebase stencil before, the * post-stencil depth test will also rebase depth to try to match it * up). */ if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { rebase_stencil = true; } } } /* If we have (just) stencil, check it for ignored low bits as well */ if (stencil_irb) { intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (stencil_tile_x & 7 || stencil_tile_y & 7) rebase_stencil = true; if (intel->gen == 4 && !intel->is_g4x) { if (stencil_tile_x || stencil_tile_y) rebase_stencil = true; } } if (rebase_stencil) { perf_debug("HW workaround: blitting stencil level %d to a temporary " "to fix alignment (stencil tile offset %d,%d)\n", stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(intel, stencil_irb); stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (depth_irb && depth_irb->mt == stencil_irb->mt) { intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); intel_renderbuffer_set_draw_offset(depth_irb); } else if (depth_irb && !rebase_depth) { if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to match stencil level %d alignment (depth tile offset " "%d,%d, stencil offset %d,%d)\n", depth_irb->mt_level, stencil_irb->mt_level, tile_x, tile_y, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(intel, depth_irb); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; if (stencil_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } WARN_ONCE(stencil_tile_x != tile_x || stencil_tile_y != tile_y, "Rebased stencil tile offset (%d,%d) doesn't match depth " "tile offset (%d,%d).\n", stencil_tile_x, stencil_tile_y, tile_x, tile_y); } } } if (!depth_irb) { tile_x = stencil_tile_x; tile_y = stencil_tile_y; } /* While we just tried to get everything aligned, we may have failed to do * so in the case of rendering to array or 3D textures, where nonzero faces * will still have an offset post-rebase. At least give an informative * warning. */ WARN_ONCE((tile_x & 7) || (tile_y & 7), "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" "Truncating offset, bad rendering may occur.\n"); tile_x &= ~7; tile_y &= ~7; /* Now, after rebasing, save off the new dephtstencil state so the hardware * packets can just dereference that without re-calculating tile offsets. */ brw->depthstencil.tile_x = tile_x; brw->depthstencil.tile_y = tile_y; brw->depthstencil.depth_offset = 0; brw->depthstencil.stencil_offset = 0; brw->depthstencil.hiz_offset = 0; brw->depthstencil.depth_mt = NULL; brw->depthstencil.stencil_mt = NULL; brw->depthstencil.hiz_mt = NULL; if (depth_irb) { depth_mt = depth_irb->mt; brw->depthstencil.depth_mt = depth_mt; brw->depthstencil.depth_offset = intel_region_get_aligned_offset(depth_mt->region, depth_irb->draw_x & ~tile_mask_x, depth_irb->draw_y & ~tile_mask_y, false); if (depth_mt->hiz_mt) { brw->depthstencil.hiz_mt = depth_mt->hiz_mt; brw->depthstencil.hiz_offset = intel_region_get_aligned_offset(depth_mt->region, depth_irb->draw_x & ~tile_mask_x, (depth_irb->draw_y & ~tile_mask_y) / 2, false); } } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); brw->depthstencil.stencil_mt = stencil_mt; if (stencil_mt->format == MESA_FORMAT_S8) { /* Note: we can't compute the stencil offset using * intel_region_get_aligned_offset(), because stencil_region claims * that the region is untiled even though it's W tiled. */ brw->depthstencil.stencil_offset = (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch + (stencil_draw_x & ~tile_mask_x) * 64; } } }
void brw_workaround_depthstencil_alignment(struct brw_context *brw, GLbitfield clear_mask) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; bool rebase_depth = false; bool rebase_stencil = false; struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = NULL; struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; uint32_t stencil_draw_x = 0, stencil_draw_y = 0; bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH; bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL; if (depth_irb) depth_mt = depth_irb->mt; /* Initialize brw->depthstencil to 'nop' workaround state. */ brw->depthstencil.tile_x = 0; brw->depthstencil.tile_y = 0; brw->depthstencil.depth_offset = 0; brw->depthstencil.stencil_offset = 0; brw->depthstencil.hiz_offset = 0; brw->depthstencil.depth_mt = NULL; brw->depthstencil.stencil_mt = NULL; if (depth_irb) brw->depthstencil.depth_mt = depth_mt; if (stencil_irb) brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb); /* Gen7+ doesn't require the workarounds, since we always program the * surface state at the start of the whole surface. */ if (brw->gen >= 7) return; /* Check if depth buffer is in depth/stencil format. If so, then it's only * safe to invalidate it if we're also clearing stencil, and both depth_irb * and stencil_irb point to the same miptree. * * Note: it's not sufficient to check for the case where * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL, * because this fails to catch depth/stencil buffers on hardware that uses * separate stencil. To catch that case, we check whether * depth_mt->stencil_mt is non-NULL. */ if (depth_irb && invalidate_depth && (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL || depth_mt->stencil_mt)) { invalidate_depth = invalidate_stencil && depth_irb && stencil_irb && depth_irb->mt == stencil_irb->mt; } uint32_t tile_mask_x, tile_mask_y; brw_get_depthstencil_tile_masks(depth_mt, depth_mt ? depth_irb->mt_level : 0, depth_mt ? depth_irb->mt_layer : 0, stencil_mt, &tile_mask_x, &tile_mask_y); if (depth_irb) { tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth * Coordinate Offset X/Y": * * "The 3 LSBs of both offsets must be zero to ensure correct * alignment" */ if (tile_x & 7 || tile_y & 7) rebase_depth = true; /* We didn't even have intra-tile offsets before g45. */ if (!brw->has_surface_tile_offset) { if (tile_x || tile_y) rebase_depth = true; } if (rebase_depth) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to fix alignment (depth tile offset %d,%d)\n", depth_irb->mt_level, tile_x, tile_y); intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); /* In the case of stencil_irb being the same packed depth/stencil * texture but not the same rb, make it point at our rebased mt, too. */ if (stencil_irb && stencil_irb != depth_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } stencil_mt = get_stencil_miptree(stencil_irb); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); int stencil_tile_x = stencil_draw_x & tile_mask_x; int stencil_tile_y = stencil_draw_y & tile_mask_y; /* If stencil doesn't match depth, then we'll need to rebase stencil * as well. (if we hadn't decided to rebase stencil before, the * post-stencil depth test will also rebase depth to try to match it * up). */ if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { rebase_stencil = true; } } } /* If we have (just) stencil, check it for ignored low bits as well */ if (stencil_irb) { intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (stencil_tile_x & 7 || stencil_tile_y & 7) rebase_stencil = true; if (!brw->has_surface_tile_offset) { if (stencil_tile_x || stencil_tile_y) rebase_stencil = true; } } if (rebase_stencil) { perf_debug("HW workaround: blitting stencil level %d to a temporary " "to fix alignment (stencil tile offset %d,%d)\n", stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil); stencil_mt = get_stencil_miptree(stencil_irb); intel_miptree_get_image_offset(stencil_mt, stencil_irb->mt_level, stencil_irb->mt_layer, &stencil_draw_x, &stencil_draw_y); stencil_tile_x = stencil_draw_x & tile_mask_x; stencil_tile_y = stencil_draw_y & tile_mask_y; if (depth_irb && depth_irb->mt == stencil_irb->mt) { intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); intel_renderbuffer_set_draw_offset(depth_irb); } else if (depth_irb && !rebase_depth) { if (tile_x != stencil_tile_x || tile_y != stencil_tile_y) { perf_debug("HW workaround: blitting depth level %d to a temporary " "to match stencil level %d alignment (depth tile offset " "%d,%d, stencil offset %d,%d)\n", depth_irb->mt_level, stencil_irb->mt_level, tile_x, tile_y, stencil_tile_x, stencil_tile_y); intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); tile_x = depth_irb->draw_x & tile_mask_x; tile_y = depth_irb->draw_y & tile_mask_y; if (stencil_irb && stencil_irb->mt == depth_mt) { intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); intel_renderbuffer_set_draw_offset(stencil_irb); } WARN_ONCE(stencil_tile_x != tile_x || stencil_tile_y != tile_y, "Rebased stencil tile offset (%d,%d) doesn't match depth " "tile offset (%d,%d).\n", stencil_tile_x, stencil_tile_y, tile_x, tile_y); } } } if (!depth_irb) { tile_x = stencil_tile_x; tile_y = stencil_tile_y; } /* While we just tried to get everything aligned, we may have failed to do * so in the case of rendering to array or 3D textures, where nonzero faces * will still have an offset post-rebase. At least give an informative * warning. */ WARN_ONCE((tile_x & 7) || (tile_y & 7), "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" "Truncating offset, bad rendering may occur.\n"); tile_x &= ~7; tile_y &= ~7; /* Now, after rebasing, save off the new dephtstencil state so the hardware * packets can just dereference that without re-calculating tile offsets. */ brw->depthstencil.tile_x = tile_x; brw->depthstencil.tile_y = tile_y; if (depth_irb) { depth_mt = depth_irb->mt; brw->depthstencil.depth_mt = depth_mt; brw->depthstencil.depth_offset = intel_region_get_aligned_offset(depth_mt->region, depth_irb->draw_x & ~tile_mask_x, depth_irb->draw_y & ~tile_mask_y, false); if (intel_renderbuffer_has_hiz(depth_irb)) { brw->depthstencil.hiz_offset = intel_region_get_aligned_offset(depth_mt->region, depth_irb->draw_x & ~tile_mask_x, (depth_irb->draw_y & ~tile_mask_y) / 2, false); } } if (stencil_irb) { stencil_mt = get_stencil_miptree(stencil_irb); brw->depthstencil.stencil_mt = stencil_mt; if (stencil_mt->format == MESA_FORMAT_S_UINT8) { /* Note: we can't compute the stencil offset using * intel_region_get_aligned_offset(), because stencil_region claims * that the region is untiled even though it's W tiled. */ brw->depthstencil.stencil_offset = (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch + (stencil_draw_x & ~tile_mask_x) * 64; } } }
/** * \brief A fast path for glReadPixels * * This fast path is taken when the source format is BGRA, RGBA, * A or L and when the texture memory is X- or Y-tiled. It downloads * the source data by directly mapping the memory without a GTT fence. * This then needs to be de-tiled on the CPU before presenting the data to * the user in the linear fasion. * * This is a performance win over the conventional texture download path. * In the conventional texture download path, the texture is either mapped * through the GTT or copied to a linear buffer with the blitter before * handing off to a software path. This allows us to avoid round-tripping * through the GPU (in the case where we would be blitting) and do only a * single copy operation. */ static bool intel_readpixels_tiled_memcpy(struct gl_context * ctx, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid * pixels, const struct gl_pixelstore_attrib *pack) { struct brw_context *brw = brw_context(ctx); struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; const struct gen_device_info *devinfo = &brw->screen->devinfo; /* This path supports reading from color buffers only */ if (rb == NULL) return false; struct intel_renderbuffer *irb = intel_renderbuffer(rb); int dst_pitch; /* The miptree's buffer. */ struct brw_bo *bo; uint32_t cpp; mem_copy_fn mem_copy = NULL; /* This fastpath is restricted to specific renderbuffer types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support * more types. */ if (!devinfo->has_llc || !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) || pixels == NULL || _mesa_is_bufferobj(pack->BufferObj) || pack->Alignment > 4 || pack->SkipPixels > 0 || pack->SkipRows > 0 || (pack->RowLength != 0 && pack->RowLength != width) || pack->SwapBytes || pack->LsbFirst || pack->Invert) return false; /* Only a simple blit, no scale, bias or other mapping. */ if (ctx->_ImageTransferState) return false; /* It is possible that the renderbuffer (or underlying texture) is * multisampled. Since ReadPixels from a multisampled buffer requires a * multisample resolve, we can't handle this here */ if (rb->NumSamples > 1) return false; /* We can't handle copying from RGBX or BGRX because the tiled_memcpy * function doesn't set the last channel to 1. Note this checks BaseFormat * rather than TexFormat in case the RGBX format is being simulated with an * RGBA format. */ if (rb->_BaseFormat == GL_RGB) return false; if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp)) return false; if (!irb->mt || (irb->mt->surf.tiling != ISL_TILING_X && irb->mt->surf.tiling != ISL_TILING_Y0)) { /* The algorithm is written only for X- or Y-tiled memory. */ return false; } /* tiled_to_linear() assumes that if the object is swizzled, it is using * I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only * true on gen5 and above. * * The killer on top is that some gen4 have an L-shaped swizzle mode, where * parts of the memory aren't swizzled at all. Userspace just can't handle * that. */ if (devinfo->gen < 5 && brw->has_swizzling) return false; /* Since we are going to read raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ intel_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false); bo = irb->mt->bo; if (brw_batch_references(&brw->batch, bo)) { perf_debug("Flushing before mapping a referenced bo.\n"); intel_batchbuffer_flush(brw); } void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW); if (map == NULL) { DBG("%s: failed to map bo\n", __func__); return false; } unsigned slice_offset_x, slice_offset_y; intel_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer, &slice_offset_x, &slice_offset_y); xoffset += slice_offset_x; yoffset += slice_offset_y; dst_pitch = _mesa_image_row_stride(pack, width, format, type); /* For a window-system renderbuffer, the buffer is actually flipped * vertically, so we need to handle that. Since the detiling function * can only really work in the forwards direction, we have to be a * little creative. First, we compute the Y-offset of the first row of * the renderbuffer (in renderbuffer coordinates). We then match that * with the last row of the client's data. Finally, we give * tiled_to_linear a negative pitch so that it walks through the * client's data backwards as it walks through the renderbufer forwards. */ if (rb->Name == 0) { yoffset = rb->Height - yoffset - height; pixels += (ptrdiff_t) (height - 1) * dst_pitch; dst_pitch = -dst_pitch; } /* We postponed printing this message until having committed to executing * the function. */ DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " "mesa_format=0x%x tiling=%d " "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", __func__, xoffset, yoffset, width, height, format, type, rb->Format, irb->mt->surf.tiling, pack->Alignment, pack->RowLength, pack->SkipPixels, pack->SkipRows); tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp, map + irb->mt->offset, dst_pitch, irb->mt->surf.row_pitch, brw->has_swizzling, irb->mt->surf.tiling, mem_copy ); brw_bo_unmap(bo); return true; }
bool intel_copy_texsubimage(struct intel_context *intel, struct intel_texture_image *intelImage, GLint dstx, GLint dsty, struct intel_renderbuffer *irb, GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; struct intel_region *region; const GLenum internalFormat = intelImage->base.Base.InternalFormat; bool copy_supported = false; bool copy_supported_with_alpha_override = false; intel_prepare_render(intel); if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, irb, internalFormat); return false; } else { region = irb->mt->region; assert(region); } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the region's pitch is less than 32k. */ if (region->pitch >= 32768) return false; if (intelImage->base.Base.TexObject->Target == GL_TEXTURE_1D_ARRAY || intelImage->base.Base.TexObject->Target == GL_TEXTURE_2D_ARRAY) { perf_debug("no support for array textures\n"); } copy_supported = intelImage->base.Base.TexFormat == intel_rb_format(irb); /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */ if (intel_rb_format(irb) == MESA_FORMAT_ARGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_XRGB8888) { copy_supported = true; } /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */ if (intel_rb_format(irb) == MESA_FORMAT_XRGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_ARGB8888) { copy_supported_with_alpha_override = true; } if (!copy_supported && !copy_supported_with_alpha_override) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s mismatched formats %s, %s\n", __FUNCTION__, _mesa_get_format_name(intelImage->base.Base.TexFormat), _mesa_get_format_name(intel_rb_format(irb))); return false; } { GLuint image_x, image_y; GLshort src_pitch; /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, &image_x, &image_y); /* The blitter can't handle Y-tiled buffers. */ if (intelImage->mt->region->tiling == I915_TILING_Y) { return false; } if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); src_pitch = -region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ src_pitch = region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, region->bo, 0, region->tiling, intelImage->mt->region->pitch, intelImage->mt->region->bo, 0, intelImage->mt->region->tiling, irb->draw_x + x, irb->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { return false; } } if (copy_supported_with_alpha_override) intel_set_teximage_alpha_to_one(ctx, intelImage); return true; }
/** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. * * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, * but limited, pitches and sizes allowed. * * The src/dst coordinates are relative to the given level/slice of the * miptree. * * If @src_flip or @dst_flip is set, then the rectangle within that miptree * will be inverted (including scanline order) when copying. This is common * in GL when copying between window system and user-created * renderbuffers/textures. */ bool intel_miptree_blit(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, bool src_flip, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, bool dst_flip, uint32_t width, uint32_t height, GLenum logicop) { /* The blitter doesn't understand multisampling at all. */ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) return false; /* No sRGB decode or encode is done by the hardware blitter, which is * consistent with what we want in the callers (glCopyTexSubImage(), * glBlitFramebuffer(), texture validation, etc.). */ mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format); mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); /* The blitter doesn't support doing any format conversions. We do also * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ if (src_format != dst_format && ((src_format != MESA_FORMAT_B8G8R8A8_UNORM && src_format != MESA_FORMAT_B8G8R8X8_UNORM) || (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __FUNCTION__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the miptree's pitch is less than 32k. */ if (src_mt->pitch >= 32768 || dst_mt->pitch >= 32768) { perf_debug("Falling back due to >=32k pitch\n"); return false; } /* The blitter has no idea about HiZ or fast color clears, so we need to * resolve the miptrees before we do anything. */ intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice); intel_miptree_resolve_color(brw, src_mt); intel_miptree_resolve_color(brw, dst_mt); if (src_flip) src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height; if (dst_flip) dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height; int src_pitch = src_mt->pitch; if (src_flip != dst_flip) src_pitch = -src_pitch; uint32_t src_image_x, src_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); src_x += src_image_x; src_y += src_image_y; /* The blitter interprets the 16-bit src x/y as a signed 16-bit value, * where negative values are invalid. The values we're working with are * unsigned, so make sure we don't overflow. */ if (src_x >= 32768 || src_y >= 32768) { perf_debug("Falling back due to >=32k src offset (%d, %d)\n", src_x, src_y); return false; } uint32_t dst_image_x, dst_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); dst_x += dst_image_x; dst_y += dst_image_y; /* The blitter interprets the 16-bit destination x/y as a signed 16-bit * value. The values we're working with are unsigned, so make sure we * don't overflow. */ if (dst_x >= 32768 || dst_y >= 32768) { perf_debug("Falling back due to >=32k dst offset (%d, %d)\n", dst_x, dst_y); return false; } if (!intelEmitCopyBlit(brw, src_mt->cpp, src_pitch, src_mt->bo, src_mt->offset, src_mt->tiling, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, src_x, src_y, dst_x, dst_y, width, height, logicop)) { return false; } if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM && dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) { intel_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); } return true; }
bool intel_copy_texsubimage(struct intel_context *intel, struct intel_texture_image *intelImage, GLint dstx, GLint dsty, struct intel_renderbuffer *irb, GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; struct intel_region *region; const GLenum internalFormat = intelImage->base.Base.InternalFormat; bool copy_supported = false; bool copy_supported_with_alpha_override = false; intel_prepare_render(intel); if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, irb, internalFormat); return false; } else { region = irb->mt->region; assert(region); } copy_supported = intelImage->base.Base.TexFormat == intel_rb_format(irb); /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */ if (intel_rb_format(irb) == MESA_FORMAT_ARGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_XRGB8888) { copy_supported = true; } /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */ if (intel_rb_format(irb) == MESA_FORMAT_XRGB8888 && intelImage->base.Base.TexFormat == MESA_FORMAT_ARGB8888) { copy_supported_with_alpha_override = true; } if (!copy_supported && !copy_supported_with_alpha_override) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s mismatched formats %s, %s\n", __FUNCTION__, _mesa_get_format_name(intelImage->base.Base.TexFormat), _mesa_get_format_name(intel_rb_format(irb))); return false; } { GLuint image_x, image_y; GLshort src_pitch; /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, 0, &image_x, &image_y); /* The blitter can't handle Y-tiled buffers. */ if (intelImage->mt->region->tiling == I915_TILING_Y) { return false; } if (ctx->ReadBuffer->Name == 0) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); src_pitch = -region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ src_pitch = region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, region->bo, 0, region->tiling, intelImage->mt->region->pitch, intelImage->mt->region->bo, 0, intelImage->mt->region->tiling, irb->draw_x + x, irb->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { return false; } } if (copy_supported_with_alpha_override) intel_set_teximage_alpha_to_one(ctx, intelImage); return true; }
/* XXX: Do this for TexSubImage also: */ static bool try_pbo_upload(struct gl_context *ctx, struct gl_texture_image *image, const struct gl_pixelstore_attrib *unpack, GLenum format, GLenum type, const void *pixels) { struct intel_texture_image *intelImage = intel_texture_image(image); struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; GLuint dst_x, dst_y; drm_intel_bo *dst_buffer, *src_buffer; if (!_mesa_is_bufferobj(unpack->BufferObj)) return false; DBG("trying pbo upload\n"); if (intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: image transfer\n", __FUNCTION__); return false; } if (!_mesa_format_matches_format_and_type(image->TexFormat, format, type, false)) { DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", __FUNCTION__, _mesa_get_format_name(image->TexFormat), format, type); return false; } ctx->Driver.AllocTextureImageBuffer(ctx, image); if (!intelImage->mt) { DBG("%s: no miptree\n", __FUNCTION__); return false; } if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY || image->TexObject->Target == GL_TEXTURE_2D_ARRAY) { DBG("%s: no support for array textures\n", __FUNCTION__); return false; } dst_buffer = intelImage->mt->region->bo; src_buffer = intel_bufferobj_source(intel, pbo, 64, &src_offset); /* note: potential 64-bit ptr to 32-bit int cast */ src_offset += (GLuint) (unsigned long) pixels; if (unpack->RowLength > 0) src_stride = unpack->RowLength; else src_stride = image->Width; src_stride *= intelImage->mt->region->cpp; intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Base.Level, intelImage->base.Base.Face, &dst_x, &dst_y); if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, false, intelImage->mt->region->pitch, dst_buffer, 0, intelImage->mt->region->tiling, 0, 0, dst_x, dst_y, image->Width, image->Height, GL_COPY)) { DBG("%s: blit failed\n", __FUNCTION__); return false; } DBG("%s: success\n", __FUNCTION__); return true; }
static bool copy_image_with_blitter(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_x, int src_y, int src_z, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_x, int dst_y, int dst_z, int src_width, int src_height) { GLuint bw, bh; uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; /* The blitter doesn't understand multisampling at all. */ if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) return false; if (src_mt->format == MESA_FORMAT_S_UINT8) return false; /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the miptree's pitch is less than 32k. */ if (src_mt->pitch >= 32768 || dst_mt->pitch >= 32768) { perf_debug("Falling back due to >=32k pitch\n"); return false; } intel_miptree_get_image_offset(src_mt, src_level, src_z, &src_image_x, &src_image_y); if (_mesa_is_format_compressed(src_mt->format)) { _mesa_get_format_block_size(src_mt->format, &bw, &bh); assert(src_x % bw == 0); assert(src_y % bh == 0); assert(src_width % bw == 0); assert(src_height % bh == 0); src_x /= (int)bw; src_y /= (int)bh; src_width /= (int)bw; src_height /= (int)bh; } src_x += src_image_x; src_y += src_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_z, &dst_image_x, &dst_image_y); if (_mesa_is_format_compressed(dst_mt->format)) { _mesa_get_format_block_size(dst_mt->format, &bw, &bh); assert(dst_x % bw == 0); assert(dst_y % bh == 0); dst_x /= (int)bw; dst_y /= (int)bh; } dst_x += dst_image_x; dst_y += dst_image_y; return intelEmitCopyBlit(brw, src_mt->cpp, src_mt->pitch, src_mt->bo, src_mt->offset, src_mt->tiling, src_mt->tr_mode, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, dst_mt->tr_mode, src_x, src_y, dst_x, dst_y, src_width, src_height, GL_COPY); }
/** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. * * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, * but limited, pitches and sizes allowed. * * The src/dst coordinates are relative to the given level/slice of the * miptree. * * If @src_flip or @dst_flip is set, then the rectangle within that miptree * will be inverted (including scanline order) when copying. This is common * in GL when copying between window system and user-created * renderbuffers/textures. */ bool intel_miptree_blit(struct brw_context *brw, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, bool src_flip, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, bool dst_flip, uint32_t width, uint32_t height, enum gl_logicop_mode logicop) { /* The blitter doesn't understand multisampling at all. */ if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) return false; /* No sRGB decode or encode is done by the hardware blitter, which is * consistent with what we want in many callers (glCopyTexSubImage(), * texture validation, etc.). */ mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format); mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); /* The blitter doesn't support doing any format conversions. We do also * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010, * but not XRGB2101010 to ARGB2101010 yet. */ if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __func__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; } /* The blitter has no idea about HiZ or fast color clears, so we need to * resolve the miptrees before we do anything. */ intel_miptree_access_raw(brw, src_mt, src_level, src_slice, false); intel_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); if (src_flip) { const unsigned h0 = src_mt->surf.phys_level0_sa.height; src_y = minify(h0, src_level - src_mt->first_level) - src_y - height; } if (dst_flip) { const unsigned h0 = dst_mt->surf.phys_level0_sa.height; dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height; } uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); src_x += src_image_x; src_y += src_image_y; dst_x += dst_image_x; dst_y += dst_image_y; if (!emit_miptree_blit(brw, src_mt, src_x, src_y, dst_mt, dst_x, dst_y, width, height, src_flip != dst_flip, logicop)) { return false; } /* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */ if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 && _mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) { intel_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height); } return true; }
/** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called * many times to choose different texture levels, cube faces, etc * before intel_finish_render_texture() is ever called. */ static void intel_render_texture(GLcontext * ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att) { struct gl_texture_image *newImage = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); struct intel_texture_image *intel_image; GLuint dst_x, dst_y; (void) fb; ASSERT(newImage); intel_image = intel_texture_image(newImage); if (!intel_image->mt) { /* Fallback on drawing to a texture that doesn't have a miptree * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); return; } else if (!irb) { irb = intel_wrap_texture(ctx, newImage); if (irb) { /* bind the wrapper to the attachment point */ _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base); } else { /* fallback to software rendering */ _mesa_render_texture(ctx, fb, att); return; } } if (!intel_update_wrapper(ctx, irb, newImage)) { _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); return; } DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", _glthread_GetID(), att->Texture->Name, newImage->Width, newImage->Height, irb->Base.RefCount); /* point the renderbufer's region to the texture image region */ if (irb->region != intel_image->mt->region) { if (irb->region) intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); } /* compute offset of the particular 2D image within the texture region */ intel_miptree_get_image_offset(intel_image->mt, att->TextureLevel, att->CubeMapFace, att->Zoffset, &dst_x, &dst_y); intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->pitch + dst_x) * intel_image->mt->cpp; intel_image->mt->region->draw_x = dst_x; intel_image->mt->region->draw_y = dst_y; intel_image->used_as_render_target = GL_TRUE; /* update drawing region, etc */ intel_draw_buffer(ctx, fb); }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static bool i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { struct gl_context *ctx = &intel->ctx; struct i830_context *i830 = i830_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; GLuint dst_x, dst_y; memset(state, 0, sizeof(*state)); /*We need to refcount these. */ if (i830->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i830->state.tex_buffer[unit]); i830->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return false; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][tObj->BaseLevel]; intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->bo); i830->state.tex_buffer[unit] = intelObj->mt->region->bo; pitch = intelObj->mt->region->pitch; /* XXX: This calculation is probably broken for tiled images with * a non-page-aligned offset. */ i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch; format = translate_texture_format(firstImage->TexFormat); state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; } state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); { if (tObj->Target == GL_TEXTURE_CUBE_MAP) state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) | CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE); else state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit)); } { GLuint minFilt, mipFilt, magFilt; float maxlod; uint32_t minlod_fixed, maxlod_fixed; switch (sampler->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return false; } if (sampler->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; } else { switch (sampler->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return false; } } lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0); if (lodbias < -64) lodbias = -64; if (lodbias > 63) lodbias = 63; state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); #if 0 /* YUV conversion: */ if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif /* We get one field with fraction bits for the maximum * addressable (smallest resolution) LOD. Use it to cover both * MAX_LEVEL and MAX_LOD. */ minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4); maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM || intel->intelScreen->deviceID == PCI_CHIP_I865_G) { maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2); maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2); state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT; state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP; } else {
/** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. * * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, * but limited, pitches and sizes allowed. * * The src/dst coordinates are relative to the given level/slice of the * miptree. * * If @src_flip or @dst_flip is set, then the rectangle within that miptree * will be inverted (including scanline order) when copying. This is common * in GL when copying between window system and user-created * renderbuffers/textures. */ bool intel_miptree_blit(struct intel_context *intel, struct intel_mipmap_tree *src_mt, int src_level, int src_slice, uint32_t src_x, uint32_t src_y, bool src_flip, struct intel_mipmap_tree *dst_mt, int dst_level, int dst_slice, uint32_t dst_x, uint32_t dst_y, bool dst_flip, uint32_t width, uint32_t height, GLenum logicop) { /* No sRGB decode or encode is done by the hardware blitter, which is * consistent with what we want in the callers (glCopyTexSubImage(), * glBlitFramebuffer(), texture validation, etc.). */ gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format); gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); /* The blitter doesn't support doing any format conversions. We do also * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A * channel to 1.0 at the end. */ if (src_format != dst_format && ((src_format != MESA_FORMAT_ARGB8888 && src_format != MESA_FORMAT_XRGB8888) || (dst_format != MESA_FORMAT_ARGB8888 && dst_format != MESA_FORMAT_XRGB8888))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " "falling back.\n", __FUNCTION__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; } /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics * Data Size Limitations): * * The BLT engine is capable of transferring very large quantities of * graphics data. Any graphics data read from and written to the * destination is permitted to represent a number of pixels that * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line * at the destination. The maximum number of pixels that may be * represented per scan line’s worth of graphics data depends on the * color depth. * * Furthermore, intelEmitCopyBlit (which is called below) uses a signed * 16-bit integer to represent buffer pitch, so it can only handle buffer * pitches < 32k. * * As a result of these two limitations, we can only use the blitter to do * this copy when the region's pitch is less than 32k. */ if (src_mt->region->pitch > 32768 || dst_mt->region->pitch > 32768) { perf_debug("Falling back due to >32k pitch\n"); return false; } if (src_flip) src_y = src_mt->level[src_level].height - src_y - height; if (dst_flip) dst_y = dst_mt->level[dst_level].height - dst_y - height; int src_pitch = src_mt->region->pitch; if (src_flip != dst_flip) src_pitch = -src_pitch; uint32_t src_image_x, src_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, &src_image_x, &src_image_y); src_x += src_image_x; src_y += src_image_y; uint32_t dst_image_x, dst_image_y; intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, &dst_image_x, &dst_image_y); dst_x += dst_image_x; dst_y += dst_image_y; if (!intelEmitCopyBlit(intel, src_mt->cpp, src_pitch, src_mt->region->bo, src_mt->offset, src_mt->region->tiling, dst_mt->region->pitch, dst_mt->region->bo, dst_mt->offset, dst_mt->region->tiling, src_x, src_y, dst_x, dst_y, width, height, logicop)) { return false; } if (src_mt->format == MESA_FORMAT_XRGB8888 && dst_mt->format == MESA_FORMAT_ARGB8888) { intel_miptree_set_alpha_to_one(intel, dst_mt, dst_x, dst_y, width, height); } return true; }
/** * Used to initialize the alpha value of an ARGB8888 teximage after * loading it from an XRGB8888 source. * * This is very common with glCopyTexImage2D(). */ void intel_set_teximage_alpha_to_one(struct gl_context *ctx, struct intel_texture_image *intel_image) { struct intel_context *intel = intel_context(ctx); unsigned int image_x, image_y; uint32_t x1, y1, x2, y2; uint32_t BR13, CMD; int pitch, cpp; drm_intel_bo *aper_array[2]; struct intel_region *region = intel_image->mt->region; BATCH_LOCALS; assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888); /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intel_image->mt, intel_image->level, intel_image->face, 0, &image_x, &image_y); x1 = image_x; y1 = image_y; x2 = image_x + intel_image->base.Width; y2 = image_y + intel_image->base.Height; pitch = region->pitch; cpp = region->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __FUNCTION__, intel_image->mt->region->buffer, (pitch * cpp), x1, y1, x2 - x1, y2 - y1); BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; CMD |= XY_BLT_WRITE_ALPHA; assert(region->tiling != I915_TILING_Y); #ifndef I915 if (region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } #endif BR13 |= (pitch * cpp); /* do space check before going any further */ aper_array[0] = intel->batch.bo; aper_array[1] = region->buffer; if (drm_intel_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)) != 0) { intel_batchbuffer_flush(intel); } BEGIN_BATCH_BLT(6); OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); OUT_BATCH((y2 << 16) | x2); OUT_RELOC_FENCED(region->buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel); }