int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence, uint64_t* handler) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = cring->sequence; unsigned idx = 0; struct dma_fence *other = NULL; idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; if (other) BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; cring->sequence++; spin_unlock(&ctx->ring_lock); dma_fence_put(other); if (handler) *handler = seq; return 0; }
static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; struct amdgpu_job *job; int r; if (!sched_job) { DRM_ERROR("job is null\n"); return NULL; } job = to_amdgpu_job(sched_job); BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); return fence; }
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) { struct hl_device *hdev = ctx->hdev; struct dma_fence *fence; spin_lock(&ctx->cs_lock); if (seq >= ctx->cs_sequence) { dev_notice(hdev->dev, "Can't wait on seq %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); spin_unlock(&ctx->cs_lock); return ERR_PTR(-EINVAL); } if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) { dev_dbg(hdev->dev, "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", seq, ctx->cs_sequence); spin_unlock(&ctx->cs_lock); return NULL; } fence = dma_fence_get( ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]); spin_unlock(&ctx->cs_lock); return fence; }
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct dma_fence *fence; spin_lock(&ctx->ring_lock); if (seq == ~0ull) seq = ctx->rings[ring->idx].sequence - 1; if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } if (seq + amdgpu_sched_jobs < cring->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); spin_unlock(&ctx->ring_lock); return fence; }
int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity, struct etnaviv_gem_submit *submit) { int ret; ret = drm_sched_job_init(&submit->sched_job, &submit->gpu->sched, sched_entity, submit->cmdbuf.ctx); if (ret) return ret; submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); mutex_lock(&submit->gpu->fence_idr_lock); submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, submit->out_fence, 0, INT_MAX, GFP_KERNEL); mutex_unlock(&submit->gpu->fence_idr_lock); if (submit->out_fence_id < 0) return -ENOMEM; /* the scheduler holds on to the job now */ kref_get(&submit->refcount); drm_sched_entity_push_job(&submit->sched_job, sched_entity); return 0; }
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, uint64_t seq) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); struct dma_fence *fence; spin_lock(&ctx->ring_lock); if (seq == ~0ull) seq = centity->sequence - 1; if (seq >= centity->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } if (seq + amdgpu_sched_jobs < centity->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); spin_unlock(&ctx->ring_lock); return fence; }
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, struct dma_fence *fence, uint64_t* handle) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; struct dma_fence *other = NULL; unsigned idx = 0; idx = seq & (amdgpu_sched_jobs - 1); other = centity->fences[idx]; if (other) BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); spin_lock(&ctx->ring_lock); centity->fences[idx] = fence; centity->sequence++; spin_unlock(&ctx->ring_lock); dma_fence_put(other); if (handle) *handle = seq; }
static void add_fence(struct dma_fence **fences, int *i, struct dma_fence *fence) { fences[*i] = fence; if (!dma_fence_is_signaled(fence)) { dma_fence_get(fence); (*i)++; } }
/** * sync_file_create() - creates a sync file * @fence: fence to add to the sync_fence * * Creates a sync_file containg @fence. This function acquires and additional * reference of @fence for the newly-created &sync_file, if it succeeds. The * sync_file can be released with fput(sync_file->file). Returns the * sync_file or NULL in case of error. */ struct sync_file *sync_file_create(struct dma_fence *fence) { struct sync_file *sync_file; sync_file = sync_file_alloc(); if (!sync_file) return NULL; sync_file->fence = dma_fence_get(fence); return sync_file; }
static void nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile, struct dma_fence *fence) { struct nouveau_drm *drm = nouveau_drm(dev); if (tile) { spin_lock(&drm->tile.lock); tile->fence = (struct nouveau_fence *)dma_fence_get(fence); tile->used = false; spin_unlock(&drm->tile.lock); } }
/** * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); if (r) return r; ib = &job->ibs[0]; dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; ib->ptr[ib->length_dw++] = upper_32_bits(dummy); ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ ib->ptr[ib->length_dw++] = 0x0000001c; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000008; ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; if (fence) *fence = dma_fence_get(f); dma_fence_put(f); return 0; err: amdgpu_job_free(job); return r; }
/** * sync_file_get_fence - get the fence related to the sync_file fd * @fd: sync_file fd to get the fence from * * Ensures @fd references a valid sync_file and returns a fence that * represents all fence in the sync_file. On error NULL is returned. */ struct dma_fence *sync_file_get_fence(int fd) { struct sync_file *sync_file; struct dma_fence *fence; sync_file = sync_file_fdget(fd); if (!sync_file) return NULL; fence = dma_fence_get(sync_file->fence); fput(sync_file->file); return fence; }
/** * sync_file_create() - creates a sync file * @fence: fence to add to the sync_fence * * Creates a sync_file containg @fence. This function acquires and additional * reference of @fence for the newly-created &sync_file, if it succeeds. The * sync_file can be released with fput(sync_file->file). Returns the * sync_file or NULL in case of error. */ struct sync_file *sync_file_create(struct dma_fence *fence) { struct sync_file *sync_file; sync_file = sync_file_alloc(); if (!sync_file) return NULL; sync_file->fence = dma_fence_get(fence); snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), fence->context, fence->seqno); return sync_file; }
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct dma_fence **f) { int r; job->ring = ring; if (!f) return -EINVAL; r = amd_sched_job_init(&job->base, &ring->sched, entity, owner); if (r) return r; job->owner = owner; job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); amd_sched_entity_push_job(&job->base); return 0; }
int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity, struct etnaviv_gem_submit *submit) { int ret = 0; /* * Hold the fence lock across the whole operation to avoid jobs being * pushed out of order with regard to their sched fence seqnos as * allocated in drm_sched_job_init. */ mutex_lock(&submit->gpu->fence_lock); ret = drm_sched_job_init(&submit->sched_job, sched_entity, submit->ctx); if (ret) goto out_unlock; submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr, submit->out_fence, 0, INT_MAX, GFP_KERNEL); if (submit->out_fence_id < 0) { drm_sched_job_cleanup(&submit->sched_job); ret = -ENOMEM; goto out_unlock; } /* the scheduler holds on to the job now */ kref_get(&submit->refcount); drm_sched_entity_push_job(&submit->sched_job, sched_entity); out_unlock: mutex_unlock(&submit->gpu->fence_lock); return ret; }
/** * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with * @f: resulting fence object * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; struct dma_fence *old, **ptr; uint32_t seq; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) return -ENOMEM; seq = ++ring->fence_drv.sync_seq; fence->ring = ring; dma_fence_init(&fence->base, &amdgpu_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ old = rcu_dereference_protected(*ptr, 1); if (old && !dma_fence_is_signaled(old)) { DRM_INFO("rcu slot is busy\n"); dma_fence_wait(old, false); } rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); *f = &fence->base; return 0; }
/** * sync_file_merge() - merge two sync_files * @name: name of new fence * @a: sync_file a * @b: sync_file b * * Creates a new sync_file which contains copies of all the fences in both * @a and @b. @a and @b remain valid, independent sync_file. Returns the * new merged sync_file or NULL in case of error. */ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; struct dma_fence **fences, **nfences, **a_fences, **b_fences; int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) return NULL; a_fences = get_fences(a, &a_num_fences); b_fences = get_fences(b, &b_num_fences); if (a_num_fences > INT_MAX - b_num_fences) return NULL; num_fences = a_num_fences + b_num_fences; fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); if (!fences) goto err; /* * Assume sync_file a and b are both ordered and have no * duplicates with the same context. * * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { struct dma_fence *pt_a = a_fences[i_a]; struct dma_fence *pt_b = b_fences[i_b]; if (pt_a->context < pt_b->context) { add_fence(fences, &i, pt_a); i_a++; } else if (pt_a->context > pt_b->context) { add_fence(fences, &i, pt_b); i_b++; } else { if (pt_a->seqno - pt_b->seqno <= INT_MAX) add_fence(fences, &i, pt_a); else add_fence(fences, &i, pt_b); i_a++; i_b++; } } for (; i_a < a_num_fences; i_a++) add_fence(fences, &i, a_fences[i_a]); for (; i_b < b_num_fences; i_b++) add_fence(fences, &i, b_fences[i_b]); if (i == 0) fences[i++] = dma_fence_get(a_fences[0]); if (num_fences > i) { nfences = krealloc(fences, i * sizeof(*fences), GFP_KERNEL); if (!nfences) goto err; fences = nfences; } if (sync_file_set_fence(sync_file, fences, i) < 0) { kfree(fences); goto err; } strlcpy(sync_file->name, name, sizeof(sync_file->name)); return sync_file; err: fput(sync_file->file); return NULL; }
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags) { struct clflush *clflush; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. * Similarly, we only access struct pages through the CPU cache, so * anything not backed by physical memory we consider to be always * coherent and not need clflushing. */ if (!i915_gem_object_has_struct_page(obj)) { obj->cache_dirty = false; return false; } /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, * the caches are only snooped when the render cache is * flushed/invalidated. As we always have to emit invalidations * and flushes when moving into and out of the RENDER domain, correct * snooping behaviour occurs naturally as the result of our domain * tracking. */ if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) return false; trace_i915_gem_object_clflush(obj); clflush = NULL; if (!(flags & I915_CLFLUSH_SYNC)) clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); if (clflush) { GEM_BUG_ON(!obj->cache_dirty); dma_fence_init(&clflush->dma, &i915_clflush_ops, &clflush_lock, to_i915(obj->base.dev)->mm.unordered_timeline, 0); i915_sw_fence_init(&clflush->wait, i915_clflush_notify); clflush->obj = i915_gem_object_get(obj); INIT_WORK(&clflush->work, i915_clflush_work); dma_fence_get(&clflush->dma); i915_sw_fence_await_reservation(&clflush->wait, obj->resv, NULL, true, I915_FENCE_TIMEOUT, I915_FENCE_GFP); reservation_object_lock(obj->resv, NULL); reservation_object_add_excl_fence(obj->resv, &clflush->dma); reservation_object_unlock(obj->resv); i915_sw_fence_commit(&clflush->wait); } else if (obj->mm.pages) { __i915_do_clflush(obj); } else { GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); } obj->cache_dirty = false; return true; }
static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); struct v3d_exec_info *exec = job->exec; enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; struct v3d_dev *v3d = exec->v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; if (unlikely(job->base.s_fence->finished.error)) return NULL; /* Lock required around bin_job update vs * v3d_overflow_mem_work(). */ spin_lock_irqsave(&v3d->job_lock, irqflags); if (q == V3D_BIN) { v3d->bin_job = job->exec; /* Clear out the overflow allocation, so we don't * reuse the overflow attached to a previous job. */ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); } else { v3d->render_job = job->exec; } spin_unlock_irqrestore(&v3d->job_lock, irqflags); /* Can we avoid this flush when q==RENDER? We need to be * careful of scheduling, though -- imagine job0 rendering to * texture and job1 reading, and them being executed as bin0, * bin1, render0, render1, so that render1's flush at bin time * wasn't enough. */ v3d_invalidate_caches(v3d); fence = v3d_fence_create(v3d, q); if (IS_ERR(fence)) return NULL; if (job->done_fence) dma_fence_put(job->done_fence); job->done_fence = dma_fence_get(fence); trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, job->start, job->end); if (q == V3D_BIN) { if (exec->qma) { V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); } if (exec->qts) { V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, V3D_CLE_CT0QTS_ENABLE | exec->qts); } } else { /* XXX: Set the QCFG */ } /* Set the current and end address of the control list. * Writing the end register is what starts the job. */ V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); return fence; }