int brw_bo_map_gtt(struct brw_context *brw, drm_intel_bo *bo, const char *bo_name) { if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) return drm_intel_gem_bo_map_gtt(bo); float start_time = get_time(); int ret = drm_intel_gem_bo_map_gtt(bo); perf_debug("GTT mapping a busy %s BO stalled and took %.03f ms.\n", bo_name, (get_time() - start_time) * 1000); return ret; }
/** * Interface for getting memory for uploading streamed data to the GPU * * In most cases, streamed data (for GPU state structures, for example) is * uploaded through brw_state_batch(), since that interface allows relocations * from the streamed space returned to other BOs. However, that interface has * the restriction that the amount of space allocated has to be "small" (see * estimated_max_prim_size in brw_draw.c). * * This interface, on the other hand, is able to handle arbitrary sized * allocation requests, though it will batch small allocations into the same * BO for efficiency and reduced memory footprint. * * \note The returned pointer is valid only until intel_upload_finish(), which * will happen at batch flush or the next * intel_upload_space()/intel_upload_data(). * * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on * entry, and will have a reference to the new BO containing the state on * return. * * \param out_offset Offset within the buffer object that the data will land. */ void * intel_upload_space(struct brw_context *brw, uint32_t size, uint32_t alignment, drm_intel_bo **out_bo, uint32_t *out_offset) { uint32_t offset; offset = ALIGN_NPOT(brw->upload.next_offset, alignment); if (brw->upload.bo && offset + size > brw->upload.bo->size) { intel_upload_finish(brw); offset = 0; } if (!brw->upload.bo) { brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data", MAX2(INTEL_UPLOAD_SIZE, size), 4096); if (brw->has_llc) drm_intel_bo_map(brw->upload.bo, true); else drm_intel_gem_bo_map_gtt(brw->upload.bo); } brw->upload.next_offset = offset + size; *out_offset = offset; if (*out_bo != brw->upload.bo) { drm_intel_bo_unreference(*out_bo); *out_bo = brw->upload.bo; drm_intel_bo_reference(brw->upload.bo); } return brw->upload.bo->virtual + offset; }
/* Creates a new VS constant buffer reflecting the current VS program's * constants, if needed by the VS program. * * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ static void brw_upload_vs_pull_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; int i; if (vp->program.IsNVProgram) _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); /* CACHE_NEW_VS_PROG */ if (!brw->vs.prog_data->nr_pull_params) { if (brw->vs.const_bo) { drm_intel_bo_unreference(brw->vs.const_bo); brw->vs.const_bo = NULL; brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0; brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } return; } /* _NEW_PROGRAM_CONSTANTS */ drm_intel_bo_unreference(brw->vs.const_bo); brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", brw->vs.prog_data->nr_pull_params * 4, 64); drm_intel_gem_bo_map_gtt(brw->vs.const_bo); for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) { memcpy(brw->vs.const_bo->virtual + i * 4, brw->vs.prog_data->pull_param[i], 4); } if (0) { for (i = 0; i < params->NumParameters; i++) { float *row = (float *)brw->vs.const_bo->virtual + i * 4; printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", i, row[0], row[1], row[2], row[3]); } } drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); const int surf = SURF_INDEX_VERT_CONST_BUFFER; intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, &brw->bind.surf_offset[surf]); brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; }
static void * i915_drm_buffer_map(struct i915_winsys *iws, struct i915_winsys_buffer *buffer, boolean write) { struct i915_drm_buffer *buf = i915_drm_buffer(buffer); drm_intel_bo *bo = intel_bo(buffer); int ret = 0; assert(bo); if (buf->map_count) goto out; ret = drm_intel_gem_bo_map_gtt(bo); buf->ptr = bo->virtual; assert(ret == 0); out: if (ret) return NULL; buf->map_count++; return buf->ptr; }
/* XXX: Thread safety? */ void * intel_region_map(struct intel_context *intel, struct intel_region *region, GLbitfield mode) { /* We have the region->map_refcount controlling mapping of the BO because * in software fallbacks we may end up mapping the same buffer multiple * times on Mesa's behalf, so we refcount our mappings to make sure that * the pointer stays valid until the end of the unmap chain. However, we * must not emit any batchbuffers between the start of mapping and the end * of unmapping, or further use of the map will be incoherent with the GPU * rendering done by that batchbuffer. Hence we assert in * intel_batchbuffer_flush() that that doesn't happen, which means that the * flush is only needed on first map of the buffer. */ _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount) { intel_flush(&intel->ctx); if (region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(region->bo); else drm_intel_bo_map(region->bo, true); region->map = region->bo->virtual; }
static int map_bo(struct buffer *my_buf) { if (drm_intel_gem_bo_map_gtt(my_buf->bo) != 0) return 0; my_buf->mmap = my_buf->bo->virtual; return 1; }
void brw_upload_vec4_pull_constants(struct brw_context *brw, GLbitfield brw_new_constbuf, const struct gl_program *prog, struct brw_stage_state *stage_state, const struct brw_vec4_prog_data *prog_data) { int i; uint32_t surf_index = prog_data->base.binding_table.pull_constants_start; /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(&brw->ctx, prog->Parameters); if (!prog_data->nr_pull_params) { if (stage_state->const_bo) { drm_intel_bo_unreference(stage_state->const_bo); stage_state->const_bo = NULL; stage_state->surf_offset[surf_index] = 0; brw->state.dirty.brw |= brw_new_constbuf; } return; } /* _NEW_PROGRAM_CONSTANTS */ drm_intel_bo_unreference(stage_state->const_bo); uint32_t size = prog_data->nr_pull_params * 4; stage_state->const_bo = drm_intel_bo_alloc(brw->bufmgr, "vec4_const_buffer", size, 64); drm_intel_gem_bo_map_gtt(stage_state->const_bo); for (i = 0; i < prog_data->nr_pull_params; i++) { memcpy(stage_state->const_bo->virtual + i * 4, prog_data->pull_param[i], 4); } if (0) { for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) { float *row = (float *)stage_state->const_bo->virtual + i * 4; printf("const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", i, row[0], row[1], row[2], row[3]); } } drm_intel_gem_bo_unmap_gtt(stage_state->const_bo); brw_create_constant_surface(brw, stage_state->const_bo, 0, size, &stage_state->surf_offset[surf_index], false); brw->state.dirty.brw |= brw_new_constbuf; }
/** * Called via glMapBufferARB(). */ static void * intel_bufferobj_map(struct gl_context * ctx, GLenum target, GLenum access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); GLboolean read_only = (access == GL_READ_ONLY_ARB); GLboolean write_only = (access == GL_WRITE_ONLY_ARB); assert(intel_obj); if (intel_obj->sys_buffer) { if (!read_only && intel_obj->source) { release_buffer(intel_obj); } if (!intel_obj->buffer || intel_obj->source) { obj->Pointer = intel_obj->sys_buffer; obj->Length = obj->Size; obj->Offset = 0; return obj->Pointer; } free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } /* Flush any existing batchbuffer that might reference this data. */ if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) intel_flush(ctx); if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } if (write_only) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->buffer, !read_only); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->buffer->virtual; obj->Length = obj->Size; obj->Offset = 0; return obj->Pointer; }
void * intel_bo_map_gtt(struct intel_bo *bo) { int err; err = drm_intel_gem_bo_map_gtt(gem_bo(bo)); if (err) { debug_error("failed to map bo"); return NULL; } return gem_bo(bo)->virtual; }
void intelFlushBatch(void) { dri_bo *bo; i965_end_batch(); drm_intel_bo_exec(xvmc_driver->batch.buf, xvmc_driver->batch.ptr - xvmc_driver->batch.init_ptr, 0, 0, 0); bo = drm_intel_bo_alloc(xvmc_driver->bufmgr, "batch buffer", BATCH_SIZE, 0x1000); if (bo != NULL && drm_intel_gem_bo_map_gtt(bo) == 0) { drm_intel_bo_unreference(xvmc_driver->batch.buf); xvmc_driver->batch.buf = bo; } else { if (bo != NULL) drm_intel_bo_unreference(bo); drm_intel_gem_bo_map_gtt(xvmc_driver->batch.buf); } reset_batch(); }
Bool intelInitBatchBuffer(void) { if ((xvmc_driver->batch.buf = drm_intel_bo_alloc(xvmc_driver->bufmgr, "batch buffer", BATCH_SIZE, 0x1000)) == NULL) { fprintf(stderr, "unable to alloc batch buffer\n"); return False; } if (drm_intel_gem_bo_map_gtt(xvmc_driver->batch.buf)) { drm_intel_bo_unreference(xvmc_driver->batch.buf); return False; } reset_batch(); return True; }
/* XXX: Thread safety? */ GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { intelFlush(&intel->ctx); _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { if (region->pbo) intel_region_cow(intel, region); if (region->tiling != I915_TILING_NONE && intel->intelScreen->kernel_exec_fencing) drm_intel_gem_bo_map_gtt(region->buffer); else dri_bo_map(region->buffer, GL_TRUE); region->map = region->buffer->virtual; } return region->map; }
void * intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) { drm_intel_bo *bo = mt->region->bo; if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { if (drm_intel_bo_busy(bo)) { perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); } } intel_flush(&intel->ctx); if (mt->region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(bo); else drm_intel_bo_map(bo, true); return bo->virtual; }
void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) { struct intel_renderbuffer *irb = intel_renderbuffer(rb); if (irb == NULL || irb->region == NULL) return; drm_intel_gem_bo_map_gtt(irb->region->buffer); rb->Data = irb->region->buffer->virtual; rb->RowStride = irb->region->pitch; /* Flip orientation if it's the window system buffer */ if (!rb->Name) { rb->Data += rb->RowStride * (irb->region->height - 1) * irb->region->cpp; rb->RowStride = -rb->RowStride; } intel_set_span_functions(intel, rb); }
static void init_buffer(struct scratch_buf *buf, unsigned size) { buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096); assert(buf->bo); buf->tiling = I915_TILING_NONE; buf->stride = 4096; sanitize_stride(buf); if (options.no_hw) buf->data = malloc(size); else { if (options.use_cpu_maps) drm_intel_bo_map(buf->bo, 1); else drm_intel_gem_bo_map_gtt(buf->bo); buf->data = buf->bo->virtual; } buf->num_tiles = options.tiles_per_buf; }
static void copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); if (element->glarray->StrideB == 0) { assert(element->count == 1); element->stride = 0; } else { element->stride = dst_stride; } if (dst_stride == element->glarray->StrideB) { if (intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(element->bo); memcpy((char *)element->bo->virtual + element->offset, element->glarray->Ptr, size); drm_intel_gem_bo_unmap_gtt(element->bo); } else {
/** * Called via glMapBufferRange(). * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, * you'd end up blocking on execution of rendering every time you mapped * the buffer to put new data in. * * We support it in 3 ways: If unsynchronized, then don't bother * flushing the batchbuffer before mapping the buffer, which can save blocking * in many cases. If we would still block, and they allow the whole buffer * to be invalidated, then just allocate a new buffer to replace the old one. * If not, and we'd block, and they allow the subrange of the buffer to be * invalidated, then we can make a new little BO, let them write into that, * and blit it into the real BO at unmap time. */ static void * intel_bufferobj_map_range(GLcontext * ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; if (intel_obj->sys_buffer) { obj->Pointer = intel_obj->sys_buffer + offset; return obj->Pointer; } if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); /* If the mapping is synchronized with other GL operations, flush * the batchbuffer so that GEM knows about the buffer access for later * syncing. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) intelFlush(ctx); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } /* If the user doesn't care about existing buffer contents and mapping * would cause us to block, then throw out the old buffer. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && (access & GL_MAP_INVALIDATE_BUFFER_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { drm_intel_bo_unreference(intel_obj->buffer); intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", intel_obj->Base.Size, 64); } /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. */ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { intel_obj->range_map_buffer = _mesa_malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->range_map_bo, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->range_map_bo->virtual; } return obj->Pointer; } if (!(access & GL_MAP_READ_BIT) && intel->intelScreen->kernel_exec_fencing) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); intel_obj->mapped_gtt = GL_FALSE; } obj->Pointer = intel_obj->buffer->virtual + offset; return obj->Pointer; }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void prepare_constant_buffer(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; if (sz == 0) { brw->curbe.last_bufsz = 0; return; } buf = brw->curbe.next_buf; /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); } } /* The clipplanes are actually delivered to both CLIP and VS units. * VS uses them to calculate the outcode bitmasks. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; /* Load the subset of push constants that will get used when * we also have a pull constant buffer. */ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { if (brw->vs.constant_map[i] != -1) { assert(brw->vs.constant_map[i] <= nr); memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } if (0) { for (i = 0; i < sz*16; i+=4) printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ } else { /* Update the record of what our last set of constants was. We * don't just flip the pointers because we don't fill in the * data in the padding between the entries. */ memcpy(brw->curbe.last_buf, buf, bufsz); brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); drm_intel_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } if (brw->curbe.curbe_bo == NULL) { /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); assert(bufsz < 4096); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; brw->curbe.curbe_next_offset += bufsz; brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, buf, bufsz); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ }
/* Ensure the supplied VA surface has valid storage for decoding the current picture */ VAStatus avc_ensure_surface_bo( VADriverContextP ctx, struct decode_state *decode_state, struct object_surface *obj_surface, const VAPictureParameterBufferH264 *pic_param ) { VAStatus va_status; uint32_t hw_fourcc, fourcc, subsample, chroma_format; /* Validate chroma format */ switch (pic_param->seq_fields.bits.chroma_format_idc) { case 0: // Grayscale fourcc = VA_FOURCC_Y800; subsample = SUBSAMPLE_YUV400; chroma_format = VA_RT_FORMAT_YUV400; break; case 1: // YUV 4:2:0 fourcc = VA_FOURCC_NV12; subsample = SUBSAMPLE_YUV420; chroma_format = VA_RT_FORMAT_YUV420; break; default: return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; } /* Determine the HW surface format, bound to VA config needs */ if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) hw_fourcc = fourcc; else { hw_fourcc = 0; switch (fourcc) { case VA_FOURCC_Y800: // Implement with an NV12 surface if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { hw_fourcc = VA_FOURCC_NV12; subsample = SUBSAMPLE_YUV420; } break; } } if (!hw_fourcc) return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; /* (Re-)allocate the underlying surface buffer store, if necessary */ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { struct i965_driver_data * const i965 = i965_driver_data(ctx); i965_destroy_surface_storage(obj_surface); va_status = i965_check_alloc_surface_bo(ctx, obj_surface, i965->codec_info->has_tiled_surface, hw_fourcc, subsample); if (va_status != VA_STATUS_SUCCESS) return va_status; } /* Fake chroma components if grayscale is implemented on top of NV12 */ if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) { const uint32_t uv_offset = obj_surface->width * obj_surface->height; const uint32_t uv_size = obj_surface->width * obj_surface->height / 2; drm_intel_gem_bo_map_gtt(obj_surface->bo); memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); drm_intel_gem_bo_unmap_gtt(obj_surface->bo); } return VA_STATUS_SUCCESS; }
/* Copy BitBlt */ GLboolean intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { GLuint CMD, BR13, pass = 0; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; dri_bo *aper_array[3]; BATCH_LOCALS; /* Blits are in a different ringbuffer so we don't use them. */ if (intel->gen >= 6) return GL_FALSE; if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; if (dst_tiling == I915_TILING_Y) return GL_FALSE; } if (src_tiling != I915_TILING_NONE) { if (src_offset & 4095) return GL_FALSE; if (src_tiling == I915_TILING_Y) return GL_FALSE; } /* do space check before going any further */ do { aper_array[0] = intel->batch->buf; aper_array[1] = dst_buffer; aper_array[2] = src_buffer; if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(intel->batch); pass++; } else break; } while (pass < 2); if (pass >= 2) { drm_intel_gem_bo_map_gtt(dst_buffer); drm_intel_gem_bo_map_gtt(src_buffer); _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset, cpp, dst_pitch, dst_x, dst_y, w, h, (GLubyte *)src_buffer->virtual + src_offset, src_pitch, src_x, src_y); drm_intel_gem_bo_unmap_gtt(src_buffer); drm_intel_gem_bo_unmap_gtt(dst_buffer); return GL_TRUE; }
static void upload_vs_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); unsigned int nr_params = vp->program.Base.Parameters->NumParameters; drm_intel_bo *constant_bo; int i; if (vp->use_const_buffer || nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", nr_params * 4 * sizeof(float), 4096); drm_intel_gem_bo_map_gtt(constant_bo); for (i = 0; i < nr_params; i++) { memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } drm_intel_gem_bo_unmap_gtt(constant_bo); BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ ALIGN(nr_params, 2) / 2 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); drm_intel_bo_unreference(constant_bo); } intel_batchbuffer_emit_mi_flush(intel->batch); BEGIN_BATCH(6); OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | GEN6_VS_STATISTICS_ENABLE); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); }
/* Upload a new set of constants. Too much variability to go into the * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ static void brw_upload_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; gl_clip_plane *clip_planes; if (sz == 0) { brw->curbe.last_bufsz = 0; goto emit; } buf = brw->curbe.next_buf; /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], brw->wm.prog_data->param[i]); } } /* When using the old VS backend, the clipplanes are actually delivered to * both CLIP and VS units. VS uses them to calculate the outcode bitmasks. * * When using the new VS backend, it is responsible for setting up its own * clipplane constants if it needs them. This results in a slight waste of * of curbe space, but the advantage is that the new VS backend can use its * general-purpose uniform layout code to store the clipplanes. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0] = fixed_plane[i][0]; buf[offset + i * 4 + 1] = fixed_plane[i][1]; buf[offset + i * 4 + 2] = fixed_plane[i][2]; buf[offset + i * 4 + 3] = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ clip_planes = brw_select_clip_planes(ctx); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0] = clip_planes[j][0]; buf[offset + i * 4 + 1] = clip_planes[j][1]; buf[offset + i * 4 + 2] = clip_planes[j][2]; buf[offset + i * 4 + 3] = clip_planes[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; if (brw->vs.prog_data->uses_new_param_layout) { for (i = 0; i < brw->vs.prog_data->nr_params; i++) { buf[offset + i] = *brw->vs.prog_data->param[i]; } } else { /* Load the subset of push constants that will get used when * we also have a pull constant buffer. */ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { if (brw->vs.constant_map[i] != -1) { assert(brw->vs.constant_map[i] <= nr); memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } } } } if (0) { for (i = 0; i < sz*16; i+=4) printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ } else { /* Update the record of what our last set of constants was. We * don't just flip the pointers because we don't fill in the * data in the padding between the entries. */ memcpy(brw->curbe.last_buf, buf, bufsz); brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); drm_intel_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } if (brw->curbe.curbe_bo == NULL) { /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); assert(bufsz < 4096); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; brw->curbe.curbe_next_offset += bufsz; brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, buf, bufsz); } /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ emit: BEGIN_BATCH(2); if (brw->curbe.total_size == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); }
/** * Called via glMapBufferRange and glMapBuffer * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, * you'd end up blocking on execution of rendering every time you mapped * the buffer to put new data in. * * We support it in 3 ways: If unsynchronized, then don't bother * flushing the batchbuffer before mapping the buffer, which can save blocking * in many cases. If we would still block, and they allow the whole buffer * to be invalidated, then just allocate a new buffer to replace the old one. * If not, and we'd block, and they allow the subrange of the buffer to be * invalidated, then we can make a new little BO, let them write into that, * and blit it into the real BO at unmap time. */ static void * intel_bufferobj_map_range(struct gl_context * ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also * internally uses our functions directly. */ obj->Offset = offset; obj->Length = length; obj->AccessFlags = access; if (intel_obj->sys_buffer) { const bool read_only = (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; if (!read_only && intel_obj->source) release_buffer(intel_obj); if (!intel_obj->buffer || intel_obj->source) { obj->Pointer = intel_obj->sys_buffer + offset; return obj->Pointer; } free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } if (intel_obj->buffer == NULL) { obj->Pointer = NULL; return NULL; } /* If the access is synchronized (like a normal buffer mapping), then get * things flushed out so the later mapping syncs appropriately through GEM. * If the user doesn't care about existing buffer contents and mapping would * cause us to block, then throw out the old buffer. * * If they set INVALIDATE_BUFFER, we can pitch the current contents to * achieve the required synchronization. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { drm_intel_bo_unreference(intel_obj->buffer); intel_bufferobj_alloc_buffer(intel, intel_obj); } else { perf_debug("Stalling on the GPU for mapping a busy buffer " "object\n"); intel_flush(ctx); } } else if (drm_intel_bo_busy(intel_obj->buffer) && (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { drm_intel_bo_unreference(intel_obj->buffer); intel_bufferobj_alloc_buffer(intel, intel_obj); } } /* If the user is mapping a range of an active buffer object but * doesn't require the current contents of that range, make a new * BO, and we'll copy what they put in there out at unmap or * FlushRange time. */ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { intel_obj->range_map_buffer = malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); } else { drm_intel_bo_map(intel_obj->range_map_bo, (access & GL_MAP_WRITE_BIT) != 0); } obj->Pointer = intel_obj->range_map_bo->virtual; } return obj->Pointer; } if (access & GL_MAP_UNSYNCHRONIZED_BIT) drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); else if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); } else { drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); } obj->Pointer = intel_obj->buffer->virtual + offset; return obj->Pointer; }