/** * Interface for getting memory for uploading streamed data to the GPU * * In most cases, streamed data (for GPU state structures, for example) is * uploaded through brw_state_batch(), since that interface allows relocations * from the streamed space returned to other BOs. However, that interface has * the restriction that the amount of space allocated has to be "small" (see * estimated_max_prim_size in brw_draw.c). * * This interface, on the other hand, is able to handle arbitrary sized * allocation requests, though it will batch small allocations into the same * BO for efficiency and reduced memory footprint. * * \note The returned pointer is valid only until intel_upload_finish(), which * will happen at batch flush or the next * intel_upload_space()/intel_upload_data(). * * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on * entry, and will have a reference to the new BO containing the state on * return. * * \param out_offset Offset within the buffer object that the data will land. */ void * intel_upload_space(struct brw_context *brw, uint32_t size, uint32_t alignment, drm_intel_bo **out_bo, uint32_t *out_offset) { uint32_t offset; offset = ALIGN_NPOT(brw->upload.next_offset, alignment); if (brw->upload.bo && offset + size > brw->upload.bo->size) { intel_upload_finish(brw); offset = 0; } if (!brw->upload.bo) { brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data", MAX2(INTEL_UPLOAD_SIZE, size), 4096); if (brw->has_llc) drm_intel_bo_map(brw->upload.bo, true); else drm_intel_gem_bo_map_gtt(brw->upload.bo); } brw->upload.next_offset = offset + size; *out_offset = offset; if (*out_bo != brw->upload.bo) { drm_intel_bo_unreference(*out_bo); *out_bo = brw->upload.bo; drm_intel_bo_reference(brw->upload.bo); } return brw->upload.bo->virtual + offset; }
static __DRIimage * intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) { __DRIimage *image; image = calloc(1, sizeof *image); if (image == NULL) return NULL; drm_intel_bo_reference(orig_image->bo); image->bo = orig_image->bo; image->internal_format = orig_image->internal_format; image->planar_format = orig_image->planar_format; image->dri_format = orig_image->dri_format; image->format = orig_image->format; image->offset = orig_image->offset; image->width = orig_image->width; image->height = orig_image->height; image->pitch = orig_image->pitch; image->tile_x = orig_image->tile_x; image->tile_y = orig_image->tile_y; image->has_depthstencil = orig_image->has_depthstencil; image->data = loaderPrivate; memcpy(image->strides, orig_image->strides, sizeof(image->strides)); memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets)); return image; }
/** * Allocates a block of space in the batchbuffer for indirect state. * * We don't want to allocate separate BOs for every bit of indirect * state in the driver. It means overallocating by a significant * margin (4096 bytes, even if the object is just a 20-byte surface * state), and more buffers to walk and count for aperture size checking. * * However, due to the restrictions inposed by the aperture size * checking performance hacks, we can't have the batch point at a * separate indirect state buffer, because once the batch points at * it, no more relocations can be added to it. So, we sneak these * buffers in at the top of the batchbuffer. */ void * brw_state_batch(struct brw_context *brw, int size, int alignment, drm_intel_bo **out_bo, uint32_t *out_offset) { struct intel_batchbuffer *batch = brw->intel.batch; uint32_t offset; assert(size < batch->buf->size); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); /* If allocating from the top would wrap below the batchbuffer, or * if the batch's used space (plus the reserved pad) collides with our * space, then flush and try again. */ if (batch->state_batch_offset < size || offset < batch->ptr - batch->map + batch->reserved_space) { intel_batchbuffer_flush(batch); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); } batch->state_batch_offset = offset; if (*out_bo != batch->buf) { drm_intel_bo_unreference(*out_bo); drm_intel_bo_reference(batch->buf); *out_bo = batch->buf; } *out_offset = offset; return batch->map + offset; }
struct intel_bo * intel_bo_ref(struct intel_bo *bo) { if (bo) drm_intel_bo_reference(gem_bo(bo)); return bo; }
static __DRIimage * intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) { int width, height, offset, stride, dri_format, index; struct intel_image_format *f; uint32_t mask_x, mask_y; __DRIimage *image; if (parent == NULL || parent->planar_format == NULL) return NULL; f = parent->planar_format; if (plane >= f->nplanes) return NULL; width = parent->region->width >> f->planes[plane].width_shift; height = parent->region->height >> f->planes[plane].height_shift; dri_format = f->planes[plane].dri_format; index = f->planes[plane].buffer_index; offset = parent->offsets[index]; stride = parent->strides[index]; image = intel_allocate_image(dri_format, loaderPrivate); if (image == NULL) return NULL; if (offset + height * stride > parent->region->bo->size) { _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds"); free(image); return NULL; } image->region = calloc(sizeof(*image->region), 1); if (image->region == NULL) { free(image); return NULL; } image->region->cpp = _mesa_get_format_bytes(image->format); image->region->width = width; image->region->height = height; image->region->pitch = stride; image->region->refcount = 1; image->region->bo = parent->region->bo; drm_intel_bo_reference(image->region->bo); image->region->tiling = parent->region->tiling; image->offset = offset; intel_setup_image_from_dimensions(image); intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false); if (offset & mask_x) _mesa_warning(NULL, "intel_create_sub_image: offset not on tile boundary"); return image; }
static void brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) { assert(!fence->batch_bo); assert(!fence->signalled); brw_emit_mi_flush(brw); fence->batch_bo = brw->batch.bo; drm_intel_bo_reference(fence->batch_bo); intel_batchbuffer_flush(brw); }
struct intel_mipmap_tree * intel_miptree_create_for_bo(struct intel_context *intel, drm_intel_bo *bo, mesa_format format, uint32_t offset, uint32_t width, uint32_t height, int pitch, uint32_t tiling) { struct intel_mipmap_tree *mt; struct intel_region *region = calloc(1, sizeof(*region)); if (!region) return NULL; /* Nothing will be able to use this miptree with the BO if the offset isn't * aligned. */ if (tiling != I915_TILING_NONE) assert(offset % 4096 == 0); /* miptrees can't handle negative pitch. If you need flipping of images, * that's outside of the scope of the mt. */ assert(pitch >= 0); mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format, 0, 0, width, height, 1, true); if (!mt) { free(region); return mt; } region->cpp = mt->cpp; region->width = width; region->height = height; region->pitch = pitch; region->refcount = 1; drm_intel_bo_reference(bo); region->bo = bo; region->tiling = tiling; mt->region = region; mt->offset = offset; return mt; }
struct pipe_fence_handle * intel_drm_fence_create(drm_intel_bo *bo) { struct intel_drm_fence *fence = CALLOC_STRUCT(intel_drm_fence); pipe_reference_init(&fence->reference, 1); /* bo is null if fence already expired */ if (bo) { drm_intel_bo_reference(bo); fence->bo = bo; } return (struct pipe_fence_handle *)fence; }
static void intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s, GLenum condition, GLbitfield flags) { struct intel_context *intel = intel_context(ctx); struct intel_sync_object *sync = (struct intel_sync_object *)s; assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); intel_batchbuffer_emit_mi_flush(intel->batch); sync->bo = intel->batch->buf; drm_intel_bo_reference(sync->bo); intelFlush(ctx); }
void intel_upload_data(struct intel_context *intel, const void *ptr, GLuint size, GLuint align, drm_intel_bo **return_bo, GLuint *return_offset) { GLuint base, delta; base = (intel->upload.offset + align - 1) / align * align; if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { wrap_buffers(intel, size); base = 0; } drm_intel_bo_reference(intel->upload.bo); *return_bo = intel->upload.bo; *return_offset = base; delta = base - intel->upload.offset; if (intel->upload.buffer_len && intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) { drm_intel_bo_subdata(intel->upload.bo, intel->upload.buffer_offset, intel->upload.buffer_len, intel->upload.buffer); intel->upload.buffer_len = 0; } if (size < sizeof(intel->upload.buffer)) { if (intel->upload.buffer_len == 0) intel->upload.buffer_offset = base; else intel->upload.buffer_len += delta; memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size); intel->upload.buffer_len += size; } else { drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); } intel->upload.offset = base + size; }
static __DRIimage * intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) { int width, height, offset, stride, dri_format, index; struct intel_image_format *f; __DRIimage *image; if (parent == NULL || parent->planar_format == NULL) return NULL; f = parent->planar_format; if (plane >= f->nplanes) return NULL; width = parent->width >> f->planes[plane].width_shift; height = parent->height >> f->planes[plane].height_shift; dri_format = f->planes[plane].dri_format; index = f->planes[plane].buffer_index; offset = parent->offsets[index]; stride = parent->strides[index]; image = intel_allocate_image(dri_format, loaderPrivate); if (image == NULL) return NULL; if (offset + height * stride > parent->bo->size) { _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds"); free(image); return NULL; } image->bo = parent->bo; drm_intel_bo_reference(parent->bo); image->width = width; image->height = height; image->pitch = stride; image->offset = offset; intel_image_warn_if_unaligned(image, __func__); return image; }
void intel_upload_unmap(struct intel_context *intel, const void *ptr, GLuint size, GLuint align, drm_intel_bo **return_bo, GLuint *return_offset) { GLuint base; base = (intel->upload.offset + align - 1) / align * align; if (size > sizeof(intel->upload.buffer)) { drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); free((void*)ptr); } drm_intel_bo_reference(intel->upload.bo); *return_bo = intel->upload.bo; *return_offset = base; intel->upload.offset = base + size; }
void intel_upload_data(struct brw_context *brw, const void *ptr, GLuint size, GLuint align, drm_intel_bo **return_bo, GLuint *return_offset) { GLuint base, delta; base = (brw->upload.offset + align - 1) / align * align; if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) { wrap_buffers(brw, size); base = 0; } drm_intel_bo_reference(brw->upload.bo); *return_bo = brw->upload.bo; *return_offset = base; delta = base - brw->upload.offset; if (brw->upload.buffer_len && brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) { drm_intel_bo_subdata(brw->upload.bo, brw->upload.buffer_offset, brw->upload.buffer_len, brw->upload.buffer); brw->upload.buffer_len = 0; } if (size < sizeof(brw->upload.buffer)) { if (brw->upload.buffer_len == 0) brw->upload.buffer_offset = base; else brw->upload.buffer_len += delta; memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size); brw->upload.buffer_len += size; } else { drm_intel_bo_subdata(brw->upload.bo, base, size, ptr); } brw->upload.offset = base + size; }
static __DRIimage * intel_create_sub_image(__DRIimage *parent, int width, int height, int dri_format, int offset, int pitch, void *loaderPrivate) { __DRIimage *image; int cpp; uint32_t mask_x, mask_y; image = intel_allocate_image(dri_format, loaderPrivate); cpp = _mesa_get_format_bytes(image->format); if (offset + height * cpp * pitch > parent->region->bo->size) { _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds"); FREE(image); return NULL; } image->region = calloc(sizeof(*image->region), 1); if (image->region == NULL) { FREE(image); return NULL; } image->region->cpp = _mesa_get_format_bytes(image->format); image->region->width = width; image->region->height = height; image->region->pitch = pitch; image->region->refcount = 1; image->region->bo = parent->region->bo; drm_intel_bo_reference(image->region->bo); image->region->tiling = parent->region->tiling; image->region->screen = parent->region->screen; image->offset = offset; intel_region_get_tile_masks(image->region, &mask_x, &mask_y); if (offset & mask_x) _mesa_warning(NULL, "intel_create_sub_image: offset not on tile boundary"); return image; }
/** * Sets up a DRIImage structure to point to a slice out of a miptree. */ static void intel_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image, struct intel_mipmap_tree *mt, GLuint level, GLuint zoffset) { intel_miptree_make_shareable(brw, mt); intel_miptree_check_level_layer(mt, level, zoffset); image->width = minify(mt->physical_width0, level - mt->first_level); image->height = minify(mt->physical_height0, level - mt->first_level); image->pitch = mt->pitch; image->offset = intel_miptree_get_tile_offsets(mt, level, zoffset, &image->tile_x, &image->tile_y); drm_intel_bo_unreference(image->bo); image->bo = mt->bo; drm_intel_bo_reference(mt->bo); }
static __DRIimage * intel_create_image_from_renderbuffer(__DRIcontext *context, int renderbuffer, void *loaderPrivate) { __DRIimage *image; struct brw_context *brw = context->driverPrivate; struct gl_context *ctx = &brw->ctx; struct gl_renderbuffer *rb; struct intel_renderbuffer *irb; rb = _mesa_lookup_renderbuffer(ctx, renderbuffer); if (!rb) { _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); return NULL; } irb = intel_renderbuffer(rb); intel_miptree_make_shareable(brw, irb->mt); image = calloc(1, sizeof *image); if (image == NULL) return NULL; image->internal_format = rb->InternalFormat; image->format = rb->Format; image->offset = 0; image->data = loaderPrivate; drm_intel_bo_unreference(image->bo); image->bo = irb->mt->bo; drm_intel_bo_reference(irb->mt->bo); image->width = rb->Width; image->height = rb->Height; image->pitch = irb->mt->pitch; image->dri_format = driGLFormatToImageFormat(image->format); image->has_depthstencil = irb->mt->stencil_mt? true : false; rb->NeedsFinishRenderTexture = true; return image; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static void brw_try_draw_prims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures(brw); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_render_state() * because it may flush the batchbuffer for a blit, affecting the state * flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs(brw, arrays); brw->ib.ib = ib; brw->ctx.NewDriverState |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->ctx.NewDriverState |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * (sampler_state_size + sizeof(struct gen5_sampler_default_color)); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex) { brw->num_instances = prims[i].num_instances; brw->basevertex = prims[i].basevertex; if (i > 0) { /* For i == 0 we just did this before the loop */ brw->ctx.NewDriverState |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } } brw->draw.gl_basevertex = prims[i].indexed ? prims[i].basevertex : prims[i].start; drm_intel_bo_unreference(brw->draw.draw_params_bo); if (prims[i].is_indirect) { /* Point draw_params_bo at the indirect buffer. */ brw->draw.draw_params_bo = intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; drm_intel_bo_reference(brw->draw.draw_params_bo); brw->draw.draw_params_offset = prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); } else { /* Set draw_params_bo to NULL so brw_prepare_vertices knows it * has to upload gl_BaseVertex and such if they're needed. */ brw->draw.draw_params_bo = NULL; brw->draw.draw_params_offset = 0; } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->ctx.NewDriverState. */ if (brw->ctx.NewDriverState) { brw->no_batch_wrap = true; brw_upload_render_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: Single primitive emit exceeded " "available aperture space\n"); } } /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. */ if (brw->ctx.NewDriverState) brw_render_state_finished(brw); } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return; }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static bool i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { struct gl_context *ctx = &intel->ctx; struct i830_context *i830 = i830_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; GLuint dst_x, dst_y; memset(state, 0, sizeof(*state)); /*We need to refcount these. */ if (i830->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i830->state.tex_buffer[unit]); i830->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return false; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][tObj->BaseLevel]; intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->bo); i830->state.tex_buffer[unit] = intelObj->mt->region->bo; pitch = intelObj->mt->region->pitch; /* XXX: This calculation is probably broken for tiled images with * a non-page-aligned offset. */ i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch; format = translate_texture_format(firstImage->TexFormat); state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; } state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); { if (tObj->Target == GL_TEXTURE_CUBE_MAP) state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) | CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE); else state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit)); } { GLuint minFilt, mipFilt, magFilt; float maxlod; uint32_t minlod_fixed, maxlod_fixed; switch (sampler->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return false; } if (sampler->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; } else { switch (sampler->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return false; } } lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0); if (lodbias < -64) lodbias = -64; if (lodbias > 63) lodbias = 63; state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); #if 0 /* YUV conversion: */ if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif /* We get one field with fraction bits for the maximum * addressable (smallest resolution) LOD. Use it to cover both * MAX_LEVEL and MAX_LOD. */ minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4); maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM || intel->intelScreen->deviceID == PCI_CHIP_I865_G) { maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2); maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2); state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT; state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP; } else {
void intel_bo_reference(struct intel_bo *bo) { drm_intel_bo_reference(gem_bo(bo)); }
static void brw_upload_indices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; drm_intel_bo *old_bo = brw->ib.bo; struct gl_buffer_object *bufferobj; GLuint offset; GLuint ib_type_size; if (index_buffer == NULL) return; ib_type_size = _mesa_sizeof_type(index_buffer->type); ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj; /* Turn into a proper VBO: */ if (!_mesa_is_bufferobj(bufferobj)) { /* Get new bufferobj, offset: */ intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size, &brw->ib.bo, &offset); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ if ((ib_type_size - 1) & offset) { perf_debug("copying index buffer to a temporary to work around " "misaligned offset %d\n", offset); GLubyte *map = ctx->Driver.MapBufferRange(ctx, offset, ib_size, GL_MAP_READ_BIT, bufferobj, MAP_INTERNAL); intel_upload_data(brw, map, ib_size, ib_type_size, &brw->ib.bo, &offset); ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL); } else { drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj), offset, ib_size); if (bo != brw->ib.bo) { drm_intel_bo_unreference(brw->ib.bo); brw->ib.bo = bo; drm_intel_bo_reference(bo); } } } /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index * of our drawing. */ brw->ib.start_vertex_offset = offset / ib_type_size; if (brw->ib.bo != old_bo) brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; if (index_buffer->type != brw->ib.type) { brw->ib.type = index_buffer->type; brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; int delta, i, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; uint32_t offset, size; if (glarray->InstanceDivisor) { offset = buffer->offset; size = (buffer->stride * ((brw->num_instances / glarray->InstanceDivisor) - 1) + glarray->_ElementSize); } else { if (min_index == -1) { offset = 0; size = intel_buffer->Base.Size; } else { offset = buffer->offset + min_index * buffer->stride; size = (buffer->stride * (max_index - min_index) + glarray->_ElementSize); } } buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset, size); drm_intel_bo_reference(buffer->bo); input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }
static void brw_upload_indices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; drm_intel_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; GLuint ib_type_size; if (index_buffer == NULL) return; ib_type_size = get_size(index_buffer->type); ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj; /* Turn into a proper VBO: */ if (!_mesa_is_bufferobj(bufferobj)) { /* Get new bufferobj, offset: */ intel_upload_data(&brw->intel, index_buffer->ptr, ib_size, ib_type_size, &bo, &offset); brw->ib.start_vertex_offset = offset / ib_type_size; } else { offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { GLubyte *map = ctx->Driver.MapBufferRange(ctx, offset, ib_size, GL_MAP_WRITE_BIT, bufferobj); intel_upload_data(&brw->intel, map, ib_size, ib_type_size, &bo, &offset); brw->ib.start_vertex_offset = offset / ib_type_size; ctx->Driver.UnmapBuffer(ctx, bufferobj); } else { /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index * of our drawing. */ brw->ib.start_vertex_offset = offset / ib_type_size; bo = intel_bufferobj_source(intel, intel_buffer_object(bufferobj), ib_type_size, &offset); drm_intel_bo_reference(bo); brw->ib.start_vertex_offset += offset / ib_type_size; } } if (brw->ib.bo != bo) { drm_intel_bo_unreference(brw->ib.bo); brw->ib.bo = bo; brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } else { drm_intel_bo_unreference(bo); } if (index_buffer->type != brw->ib.type) { brw->ib.type = index_buffer->type; brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static GLboolean i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { struct gl_context *ctx = &intel->ctx; struct i915_context *i915 = i915_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; GLuint *state = i915->state.Tex[unit], format, pitch; GLint lodbias, aniso = 0; GLubyte border[4]; GLfloat maxlod; memset(state, 0, sizeof(state)); /*We need to refcount these. */ if (i915->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i915->state.tex_buffer[unit]); i915->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][intelObj->firstLevel]; drm_intel_bo_reference(intelObj->mt->region->buffer); i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */ format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat, tObj->DepthMode); pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I915_TEXREG_MS3] |= MS3_TILE_WALK; } /* We get one field with fraction bits for the maximum addressable * (lowest resolution) LOD. Use it to cover both MAX_LEVEL and * MAX_LOD. */ maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); { GLuint minFilt, mipFilt, magFilt; switch (tObj->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return GL_FALSE; } if (tObj->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; if (tObj->MaxAnisotropy > 2.0) aniso = SS2_MAX_ANISO_4; else aniso = SS2_MAX_ANISO_2; } else { switch (tObj->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return GL_FALSE; } } lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0); if (lodbias < -256) lodbias = -256; if (lodbias > 255) lodbias = 255; state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); /* YUV conversion: */ if (firstImage->TexFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat == MESA_FORMAT_YCBCR_REV) state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION; /* Shadow: */ if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && tObj->Target != GL_TEXTURE_3D) { if (tObj->Target == GL_TEXTURE_1D) return GL_FALSE; state[I915_TEXREG_SS2] |= (SS2_SHADOW_ENABLE | intel_translate_shadow_compare_func(tObj->CompareFunc)); minFilt = FILTER_4X4_FLAT; magFilt = FILTER_4X4_FLAT; } state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | (mipFilt << SS2_MIP_FILTER_SHIFT) | (magFilt << SS2_MAG_FILTER_SHIFT) | aniso); } { GLenum ws = tObj->WrapS; GLenum wt = tObj->WrapT; GLenum wr = tObj->WrapR; float minlod; /* We program 1D textures as 2D textures, so the 2D texcoord could * result in sampling border values if we don't set the T wrap to * repeat. */ if (tObj->Target == GL_TEXTURE_1D) wt = GL_REPEAT; /* 3D textures don't seem to respect the border color. * Fallback if there's ever a danger that they might refer to * it. * * Effectively this means fallback on 3D clamp or * clamp_to_border. */ if (tObj->Target == GL_TEXTURE_3D && (tObj->MinFilter != GL_NEAREST || tObj->MagFilter != GL_NEAREST) && (ws == GL_CLAMP || wt == GL_CLAMP || wr == GL_CLAMP || ws == GL_CLAMP_TO_BORDER || wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER)) return GL_FALSE; /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not * used) when using cube map texture coordinates */ if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB && (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) || ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE)))) return GL_FALSE; state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ state[I915_TEXREG_SS3] |= ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); minlod = MIN2(tObj->MinLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) << SS3_MIN_LOD_SHIFT); } /* convert border color from float to ubyte */ CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]); CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]); CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]); CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the channels * for safety. */ state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0], border[0], border[0], border[0]); } else { state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3], border[0], border[1], border[2]); } I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE); /* memcmp was already disabled, but definitely won't work as the * region might now change and that wouldn't be detected: */ I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); #if 0 DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]); DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]); DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]); DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]); DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]); DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]); #endif return GL_TRUE; }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static GLboolean i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { GLcontext *ctx = &intel->ctx; struct i830_context *i830 = i830_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; GLuint dst_x, dst_y; memset(state, 0, sizeof(state)); /*We need to refcount these. */ if (i830->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i830->state.tex_buffer[unit]); i830->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][intelObj->firstLevel]; intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->buffer); i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; /* XXX: This calculation is probably broken for tiled images with * a non-page-aligned offset. */ i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch; format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat); state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; } state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); { if (tObj->Target == GL_TEXTURE_CUBE_MAP) state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) | CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE); else state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit)); } { GLuint minFilt, mipFilt, magFilt; switch (tObj->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return GL_FALSE; } if (tObj->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; } else { switch (tObj->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return GL_FALSE; } } lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0); if (lodbias < -64) lodbias = -64; if (lodbias > 63) lodbias = 63; state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); #if 0 /* YUV conversion: */ if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel - intelObj->firstLevel) * 4) << TM0S3_MIN_MIP_SHIFT; state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | (mipFilt << TM0S3_MIP_FILTER_SHIFT) | (magFilt << TM0S3_MAG_FILTER_SHIFT)); } { GLenum ws = tObj->WrapS; GLenum wt = tObj->WrapT; /* 3D textures not available on i830 */ if (tObj->Target == GL_TEXTURE_3D) return GL_FALSE; state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | MAP_UNIT(unit) | ENABLE_TEXCOORD_PARAMS | ss3 | ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt)) | ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(translate_wrap_mode (ws))); } /* convert border color from float to ubyte */ CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]); CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]); CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]); CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]); state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3], border[0], border[1], border[2]); I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); /* memcmp was already disabled, but definitely won't work as the * region might now change and that wouldn't be detected: */ I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); return GL_TRUE; }
/* Recalculate all state from scratch. Perhaps not the most * efficient, but this has gotten complex enough that we need * something which is understandable and reliable. */ static bool i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { struct gl_context *ctx = &intel->ctx; struct i915_context *i915 = i915_context(ctx); struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *tObj = tUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); GLuint *state = i915->state.Tex[unit], format, pitch; GLint lodbias, aniso = 0; GLubyte border[4]; GLfloat maxlod; memset(state, 0, sizeof(state)); /*We need to refcount these. */ if (i915->state.tex_buffer[unit] != NULL) { drm_intel_bo_unreference(i915->state.tex_buffer[unit]); i915->state.tex_buffer[unit] = NULL; } if (!intel_finalize_mipmap_tree(intel, unit)) return false; /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ firstImage = tObj->Image[0][tObj->BaseLevel]; drm_intel_bo_reference(intelObj->mt->region->bo); i915->state.tex_buffer[unit] = intelObj->mt->region->bo; i915->state.tex_offset[unit] = intelObj->mt->offset; format = translate_texture_format(firstImage->TexFormat, tObj->DepthMode); pitch = intelObj->mt->region->pitch * intelObj->mt->cpp; state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format); if (intelObj->mt->region->tiling != I915_TILING_NONE) { state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; if (intelObj->mt->region->tiling == I915_TILING_Y) state[I915_TEXREG_MS3] |= MS3_TILE_WALK; } /* We get one field with fraction bits for the maximum addressable * (lowest resolution) LOD. Use it to cover both MAX_LEVEL and * MAX_LOD. */ maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); { GLuint minFilt, mipFilt, magFilt; switch (sampler->MinFilter) { case GL_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NONE; break; case GL_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NONE; break; case GL_NEAREST_MIPMAP_NEAREST: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_NEAREST; break; case GL_LINEAR_MIPMAP_NEAREST: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: minFilt = FILTER_NEAREST; mipFilt = MIPFILTER_LINEAR; break; case GL_LINEAR_MIPMAP_LINEAR: minFilt = FILTER_LINEAR; mipFilt = MIPFILTER_LINEAR; break; default: return false; } if (sampler->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; if (sampler->MaxAnisotropy > 2.0) aniso = SS2_MAX_ANISO_4; else aniso = SS2_MAX_ANISO_2; } else { switch (sampler->MagFilter) { case GL_NEAREST: magFilt = FILTER_NEAREST; break; case GL_LINEAR: magFilt = FILTER_LINEAR; break; default: return false; } } lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0); if (lodbias < -256) lodbias = -256; if (lodbias > 255) lodbias = 255; state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); /* YUV conversion: */ if (firstImage->TexFormat == MESA_FORMAT_YCBCR || firstImage->TexFormat == MESA_FORMAT_YCBCR_REV) state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION; /* Shadow: */ if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && tObj->Target != GL_TEXTURE_3D) { if (tObj->Target == GL_TEXTURE_1D) return false; state[I915_TEXREG_SS2] |= (SS2_SHADOW_ENABLE | intel_translate_shadow_compare_func(sampler->CompareFunc)); minFilt = FILTER_4X4_FLAT; magFilt = FILTER_4X4_FLAT; } state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | (mipFilt << SS2_MIP_FILTER_SHIFT) | (magFilt << SS2_MAG_FILTER_SHIFT) | aniso); } { GLenum ws = sampler->WrapS; GLenum wt = sampler->WrapT; GLenum wr = sampler->WrapR; float minlod; /* We program 1D textures as 2D textures, so the 2D texcoord could * result in sampling border values if we don't set the T wrap to * repeat. */ if (tObj->Target == GL_TEXTURE_1D) wt = GL_REPEAT; /* 3D textures don't seem to respect the border color. * Fallback if there's ever a danger that they might refer to * it. * * Effectively this means fallback on 3D clamp or * clamp_to_border. */ if (tObj->Target == GL_TEXTURE_3D && (sampler->MinFilter != GL_NEAREST || sampler->MagFilter != GL_NEAREST) && (ws == GL_CLAMP || wt == GL_CLAMP || wr == GL_CLAMP || ws == GL_CLAMP_TO_BORDER || wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER)) return false; /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not * used) when using cube map texture coordinates */ if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB && (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) || ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE)))) return false; /* * According to 3DSTATE_MAP_STATE at page of 104 in Bspec * Vol3d 3D Instructions: * [DevGDG and DevAlv]: Must be a power of 2 for cube maps. * [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps * must have all faces enabled. * * But, as I tested on pineview(DevBLB derived), the rendering is * bad(you will find the color isn't samplered right in some * fragments). After checking, it seems that the texture layout is * wrong: making the width and height align of 4(although this * doesn't make much sense) will fix this issue and also broke some * others. Well, Bspec mentioned nothing about the layout alignment * and layout for NPOT cube map. I guess the Bspec just assume it's * a POT cube map. * * Thus, I guess we need do this for other platforms as well. */ if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB && !is_power_of_two(firstImage->Height)) return false; state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ state[I915_TEXREG_SS3] |= ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); minlod = MIN2(sampler->MinLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) << SS3_MIN_LOD_SHIFT); } /* convert border color from float to ubyte */ CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]); CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]); CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]); CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the channels * for safety. */ state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0], border[0], border[0], border[0]); } else { state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3], border[0], border[1], border[2]); } I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), true); /* memcmp was already disabled, but definitely won't work as the * region might now change and that wouldn't be detected: */ I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); #if 0 DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]); DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]); DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]); DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]); DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]); DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]); #endif return true; }
static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); /* CACHE_NEW_VS_PROG */ GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0, total_size = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; int delta, i, j; GLboolean can_merge_uploads = GL_TRUE; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* First build an array of pointers to ve's in vb.inputs_read */ if (0) printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~BITFIELD64_BIT(i); if (input->glarray->Size && get_size(input->glarray->Type)) brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) goto prepare; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; int type_size = get_size(glarray->Type); input->element_size = type_size * glarray->Size; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); int k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->bo = intel_bufferobj_source(intel, intel_buffer, type_size, &buffer->offset); drm_intel_bo_reference(buffer->bo); buffer->offset += (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; input->buffer = j++; input->offset = 0; } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, * we may end up dereferencing a pointer beyond the bounds of the * GTT. We would hope that the VBO's max_index would save us, but * Mesa appears to hand us min/max values not clipped to the * array object's _MaxElement, and _MaxElement frequently appears * to be wrong anyway. * * The VBO spec allows application termination in this case, and it's * probably a service to the poor programmer to do so rather than * trying to just not render. */ assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { /* Position array not properly enabled: */ if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) { intel->Fallback = true; /* boolean, not bitfield */ return; } interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || (uintptr_t)(glarray->Ptr - ptr) > interleaved) { interleaved = 0; } else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1)) { /* enforce natural alignment (for doubles) */ interleaved = 0; } upload[nr_uploads++] = input; total_size = ALIGN(total_size, type_size); total_size += input->element_size; if (glarray->InstanceDivisor != 0) { can_merge_uploads = GL_FALSE; } } } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } if (delta && !brw->intel.intelScreen->relaxed_relocations) min_index = delta = 0; /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved && interleaved <= 2*total_size) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } else if ((total_size < 2048) && can_merge_uploads) { /* Upload non-interleaved arrays into a single interleaved array */ struct brw_vertex_buffer *buffer; int count = MAX2(max_index - min_index + 1, 1); int offset; char *map; map = intel_upload_map(&brw->intel, total_size * count, total_size); for (i = offset = 0; i < nr_uploads; i++) { const unsigned char *src = upload[i]->glarray->Ptr; int size = upload[i]->element_size; int stride = upload[i]->glarray->StrideB; char *dst; int n; offset = ALIGN(offset, get_size(upload[i]->glarray->Type)); dst = map + offset; src += min_index * stride; for (n = 0; n < count; n++) { memcpy(dst, src, size); src += stride; dst += total_size; } upload[i]->offset = offset; upload[i]->buffer = j; offset += size; } assert(offset == total_size); buffer = &brw->vb.buffers[j++]; intel_upload_unmap(&brw->intel, map, offset * count, offset, &buffer->bo, &buffer->offset); buffer->stride = offset; buffer->step_rate = 0; buffer->offset -= delta * offset; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->element_size); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->element_size); } buffer->offset -= delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } /* can we simply extend the current vb? */ if (j == brw->vb.nr_current_buffers) { int delta = 0; for (i = 0; i < j; i++) { int d; if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride || brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; if (d < 0) break; if (i == 0) delta = d / brw->vb.current_buffers[i].stride; if (delta * brw->vb.current_buffers[i].stride != d) break; } if (i == j) { brw->vb.start_vertex_bias += delta; while (--j >= 0) drm_intel_bo_unreference(brw->vb.buffers[j].bo); j = 0; } } brw->vb.nr_buffers = j; prepare: brw_prepare_query_begin(brw); }
void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_VS_PROG_DATA */ const struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(brw->vs.base.prog_data); GLbitfield64 vs_inputs = vs_prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; unsigned i; int delta, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; /* _NEW_POLYGON * * On gen6+, edge flags don't end up in the VUE (either in or out of the * VS). Instead, they're uploaded as the last vertex element, and the data * is passed sideband through the fixed function units. So, we need to * prepare the vertex buffer for it, but it's not present in inputs_read. */ if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)) { vs_inputs |= VERT_BIT_EDGEFLAG; } if (0) fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint index = ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[index]; vs_inputs &= ~BITFIELD64_BIT(index); brw->vb.enabled[brw->vb.nr_enabled++] = input; } if (brw->vb.nr_enabled == 0) return; if (brw->vb.nr_buffers) return; /* The range of data in a given buffer represented as [min, max) */ struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; uint32_t buffer_range_start[VERT_ATTRIB_MAX]; uint32_t buffer_range_end[VERT_ATTRIB_MAX]; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_client_array *glarray = input->glarray; if (_mesa_is_bufferobj(glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(glarray->BufferObj); const uint32_t offset = (uintptr_t)glarray->Ptr; /* Start with the worst case */ uint32_t start = 0; uint32_t range = intel_buffer->Base.Size; if (glarray->InstanceDivisor) { if (brw->num_instances) { start = offset + glarray->StrideB * brw->baseinstance; range = (glarray->StrideB * ((brw->num_instances - 1) / glarray->InstanceDivisor) + glarray->_ElementSize); } } else { if (brw->vb.index_bounds_valid) { start = offset + min_index * glarray->StrideB; range = (glarray->StrideB * (max_index - min_index) + glarray->_ElementSize); } } /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. */ unsigned k; for (k = 0; k < i; k++) { const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; input->offset = glarray->Ptr - other->Ptr; buffer_range_start[input->buffer] = MIN2(buffer_range_start[input->buffer], start); buffer_range_end[input->buffer] = MAX2(buffer_range_end[input->buffer], start + range); break; } } if (k == i) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* Named buffer object: Just reference its contents directly. */ buffer->offset = offset; buffer->stride = glarray->StrideB; buffer->step_rate = glarray->InstanceDivisor; buffer->size = glarray->BufferObj->Size - offset; enabled_buffer[j] = intel_buffer; buffer_range_start[j] = start; buffer_range_end[j] = start + range; input->buffer = j++; input->offset = 0; } } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ if (nr_uploads == 0) { interleaved = glarray->StrideB; ptr = glarray->Ptr; } else if (interleaved != glarray->StrideB || glarray->Ptr < ptr || (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if the first attribute's stride didn't cover our element, * disable the interleaved upload optimization. The second case * can most commonly occur in cases where there is a single vertex * and, for example, the data is stored on the application's * stack. * * NOTE: This will also disable the optimization in cases where * the data is in a different order than the array indices. * Something like: * * float data[...]; * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); */ interleaved = 0; } upload[nr_uploads++] = input; } } /* Now that we've set up all of the buffers, we walk through and reference * each of them. We do this late so that we get the right size in each * buffer and don't reference too little data. */ for (i = 0; i < j; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; if (buffer->bo) continue; const uint32_t start = buffer_range_start[i]; const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); drm_intel_bo_reference(buffer->bo); } /* If we need to upload all the arrays, then we can trim those arrays to * only the used elements [min_index, max_index] so long as we adjust all * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. */ brw->vb.start_vertex_bias = 0; delta = min_index; if (nr_uploads == brw->vb.nr_enabled) { brw->vb.start_vertex_bias = -delta; delta = 0; } /* Handle any arrays to be uploaded. */ if (nr_uploads > 1) { if (interleaved) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; /* All uploads are interleaved, so upload the arrays together as * interleaved. First, upload the contents and set up upload[0]. */ copy_array_to_vbo_array(brw, upload[0], min_index, max_index, buffer, interleaved); buffer->offset -= delta * interleaved; buffer->size += delta * interleaved; for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; } j++; nr_uploads = 0; } } /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; if (upload[i]->glarray->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, buffer, upload[i]->glarray->_ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the * instanced draw has been run InstanceDivisor times. */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; buffer->size += delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } brw->vb.nr_buffers = j; }