/* * Set the next domain for the specified object. This * may not actually perform the necessary flushing/invaliding though, * as that may want to be batched with other set_domain operations * * This is (we hope) the only really tricky part of gem. The goal * is fairly simple -- track which caches hold bits of the object * and make sure they remain coherent. A few concrete examples may * help to explain how it works. For shorthand, we use the notation * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the * a pair of read and write domain masks. * * Case 1: the batch buffer * * 1. Allocated * 2. Written by CPU * 3. Mapped to GTT * 4. Read by GPU * 5. Unmapped from GTT * 6. Freed * * Let's take these a step at a time * * 1. Allocated * Pages allocated from the kernel may still have * cache contents, so we set them to (CPU, CPU) always. * 2. Written by CPU (using pwrite) * The pwrite function calls set_domain (CPU, CPU) and * this function does nothing (as nothing changes) * 3. Mapped by GTT * This function asserts that the object is not * currently in any GPU-based read or write domains * 4. Read by GPU * i915_gem_execbuffer calls set_domain (COMMAND, 0). * As write_domain is zero, this function adds in the * current read domains (CPU+COMMAND, 0). * flush_domains is set to CPU. * invalidate_domains is set to COMMAND * clflush is run to get data out of the CPU caches * then i915_dev_set_domain calls i915_gem_flush to * emit an MI_FLUSH and drm_agp_chipset_flush * 5. Unmapped from GTT * i915_gem_object_unbind calls set_domain (CPU, CPU) * flush_domains and invalidate_domains end up both zero * so no flushing/invalidating happens * 6. Freed * yay, done * * Case 2: The shared render buffer * * 1. Allocated * 2. Mapped to GTT * 3. Read/written by GPU * 4. set_domain to (CPU,CPU) * 5. Read/written by CPU * 6. Read/written by GPU * * 1. Allocated * Same as last example, (CPU, CPU) * 2. Mapped to GTT * Nothing changes (assertions find that it is not in the GPU) * 3. Read/written by GPU * execbuffer calls set_domain (RENDER, RENDER) * flush_domains gets CPU * invalidate_domains gets GPU * clflush (obj) * MI_FLUSH and drm_agp_chipset_flush * 4. set_domain (CPU, CPU) * flush_domains gets GPU * invalidate_domains gets CPU * wait_rendering (obj) to make sure all drawing is complete. * This will include an MI_FLUSH to get the data from GPU * to memory * clflush (obj) to invalidate the CPU cache * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) * 5. Read/written by CPU * cache lines are loaded and dirtied * 6. Read written by GPU * Same as last GPU access * * Case 3: The constant buffer * * 1. Allocated * 2. Written by CPU * 3. Read by GPU * 4. Updated (written) by CPU again * 5. Read by GPU * * 1. Allocated * (CPU, CPU) * 2. Written by CPU * (CPU, CPU) * 3. Read by GPU * (CPU+RENDER, 0) * flush_domains = CPU * invalidate_domains = RENDER * clflush (obj) * MI_FLUSH * drm_agp_chipset_flush * 4. Updated (written) by CPU again * (CPU, CPU) * flush_domains = 0 (no previous write domain) * invalidate_domains = 0 (no new read domains) * 5. Read by GPU * (CPU+RENDER, 0) * flush_domains = CPU * invalidate_domains = RENDER * clflush (obj) * MI_FLUSH * drm_agp_chipset_flush */ static void i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, struct change_domains *cd) { uint32_t invalidate_domains = 0, flush_domains = 0; /* * If the object isn't moving to a new write domain, * let the object stay in multiple read domains */ if (obj->base.pending_write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; /* * Flush the current write domain if * the new read domains don't match. Invalidate * any read domains which differ from the old * write domain */ if (obj->base.write_domain && (((obj->base.write_domain != obj->base.pending_read_domains || obj->ring != ring)) || (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { flush_domains |= obj->base.write_domain; invalidate_domains |= obj->base.pending_read_domains & ~obj->base.write_domain; } /* * Invalidate any read caches which may have * stale data. That is, any new read domains. */ invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); /* blow away mappings if mapped through GTT */ if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) i915_gem_release_mmap(obj); /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' * write_domains). So if we have a current write domain that we * aren't changing, set pending_write_domain to that. */ if (flush_domains == 0 && obj->base.pending_write_domain == 0) obj->base.pending_write_domain = obj->base.write_domain; cd->invalidate_domains |= invalidate_domains; cd->flush_domains |= flush_domains; if (flush_domains & I915_GEM_GPU_DOMAINS) cd->flush_rings |= obj->ring->id; if (invalidate_domains & I915_GEM_GPU_DOMAINS) cd->flush_rings |= ring->id; }
/** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_i915_gem_set_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *obj; struct drm_i915_gem_object *obj_priv; int ret = 0; obj = drm_gem_object_lookup(dev, file_priv, args->handle); if (obj == NULL) return -EINVAL; obj_priv = obj->driver_private; if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) { mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return -EINVAL; } if (args->tiling_mode == I915_TILING_NONE) { args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, * and we use the pread/pwrite bit17 paths to swizzle for it. * If there was a user that was relying on the swizzle * information for drm_intel_bo_map()ed reads/writes this would * break it, but we don't have any of those. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } } mutex_lock(&dev->struct_mutex); if (args->tiling_mode != obj_priv->tiling_mode || args->stride != obj_priv->stride) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is cleared. */ if (!i915_gem_object_fence_offset_ok(obj, args->tiling_mode)) ret = i915_gem_object_unbind(obj); else ret = i915_gem_object_put_fence_reg(obj); if (ret != 0) { WARN(ret != -ERESTARTSYS, "failed to reset object for tiling switch"); args->tiling_mode = obj_priv->tiling_mode; args->stride = obj_priv->stride; goto err; } /* If we've changed tiling, GTT-mappings of the object * need to re-fault to ensure that the correct fence register * setup is in place. */ i915_gem_release_mmap(obj); obj_priv->tiling_mode = args->tiling_mode; obj_priv->stride = args->stride; } err: drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return ret; }
/** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; int ret; ret = i915_gem_check_is_wedged(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (obj == NULL) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { drm_gem_object_unreference_unlocked(&obj->base); return -EINVAL; } if (obj->pin_count) { drm_gem_object_unreference_unlocked(&obj->base); return -EBUSY; } if (args->tiling_mode == I915_TILING_NONE) { args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, * and we use the pread/pwrite bit17 paths to swizzle for it. * If there was a user that was relying on the swizzle * information for drm_intel_bo_map()ed reads/writes this would * break it, but we don't have any of those. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } } mutex_lock(&dev->struct_mutex); if (args->tiling_mode != obj->tiling_mode || args->stride != obj->stride) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is cleared. */ i915_gem_release_mmap(obj); obj->map_and_fenceable = obj->gtt_space == NULL || (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); obj->tiling_changed = true; obj->tiling_mode = args->tiling_mode; obj->stride = args->stride; } drm_gem_object_unreference(&obj->base); mutex_unlock(&dev->struct_mutex); return 0; }
/** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; int ret = 0; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { drm_gem_object_unreference_unlocked(&obj->base); return -EINVAL; } if (i915_gem_obj_is_pinned(obj) || obj->framebuffer_references) { drm_gem_object_unreference_unlocked(&obj->base); return -EBUSY; } if (args->tiling_mode == I915_TILING_NONE) { args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, * and we use the pread/pwrite bit17 paths to swizzle for it. * If there was a user that was relying on the swizzle * information for drm_intel_bo_map()ed reads/writes this would * break it, but we don't have any of those. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } } mutex_lock(&dev->struct_mutex); if (args->tiling_mode != obj->tiling_mode || args->stride != obj->stride) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is updated before * the next fenced (either through the GTT or by the BLT unit * on older GPUs) access. * * After updating the tiling parameters, we then flag whether * we need to update an associated fence register. Note this * has to also include the unfenced register the GPU uses * whilst executing a fenced command for an untiled object. */ obj->map_and_fenceable = !i915_gem_obj_ggtt_bound(obj) || (i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { u32 unfenced_align = i915_gem_get_gtt_alignment(dev, obj->base.size, args->tiling_mode, false); if (i915_gem_obj_ggtt_offset(obj) & (unfenced_align - 1)) ret = i915_gem_object_ggtt_unbind(obj); } if (ret == 0) { obj->fence_dirty = obj->last_fenced_seqno || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; obj->stride = args->stride; /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); } } /* we have to maintain this existing ABI... */ args->stride = obj->stride; args->tiling_mode = obj->tiling_mode; /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { if (obj->bit_17 == NULL) { obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT), sizeof(long), GFP_KERNEL); } } else {
/** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; int ret = 0; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { drm_gem_object_unreference_unlocked(&obj->base); return -EINVAL; } if (obj->pin_count) { drm_gem_object_unreference_unlocked(&obj->base); return -EBUSY; } if (args->tiling_mode == I915_TILING_NONE) { args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, * and we use the pread/pwrite bit17 paths to swizzle for it. * If there was a user that was relying on the swizzle * information for drm_intel_bo_map()ed reads/writes this would * break it, but we don't have any of those. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } } DRM_LOCK(dev); if (args->tiling_mode != obj->tiling_mode || args->stride != obj->stride) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is cleared. * * After updating the tiling parameters, we then flag whether * we need to update an associated fence register. Note this * has to also include the unfenced register the GPU uses * whilst executing a fenced command for an untiled object. */ i915_gem_release_mmap(obj); obj->map_and_fenceable = obj->gtt_space == NULL || (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { u32 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, args->tiling_mode); if (obj->gtt_offset & (unfenced_alignment - 1)) ret = i915_gem_object_unbind(obj); } if (ret == 0) { obj->fence_dirty = obj->fenced_gpu_access || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; obj->stride = args->stride; } } /* we have to maintain this existing ABI... */ args->stride = obj->stride; args->tiling_mode = obj->tiling_mode; drm_gem_object_unreference(&obj->base); DRM_UNLOCK(dev); return ret; }