uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct fence *fence) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = cring->sequence; unsigned idx = 0; struct fence *other = NULL; idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; if (other) { signed long r; r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) DRM_ERROR("Error (%ld) waiting for fence!\n", r); } fence_get(fence); spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; cring->sequence++; spin_unlock(&ctx->ring_lock); fence_put(other); return seq; }
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) { struct amdgpu_fence *fence = NULL; struct amdgpu_job *job; int r; if (!sched_job) { DRM_ERROR("job is null\n"); return NULL; } job = to_amdgpu_job(sched_job); mutex_lock(&job->job_lock); r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->base.owner); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; } fence = job->ibs[job->num_ibs - 1].fence; fence_get(&fence->base); err: if (job->free_job) job->free_job(job); mutex_unlock(&job->job_lock); fence_put(&job->base.s_fence->base); kfree(job); return fence ? &fence->base : NULL; }
void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) { struct fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0; old = reservation_object_get_list(obj); if (old) i = old->shared_count; if (fence) fence_get(fence); preempt_disable(); write_seqcount_begin(&obj->seq); /* write_seqcount_begin provides the necessary memory barrier */ RCU_INIT_POINTER(obj->fence_excl, fence); if (old) old->shared_count = 0; write_seqcount_end(&obj->seq); preempt_enable(); /* inplace update, no shared fences */ while (i--) fence_put(rcu_dereference_protected(old->shared[i], reservation_object_held(obj))); if (old_fence) fence_put(old_fence); }
static void reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *old, struct reservation_object_list *fobj, struct fence *fence) { unsigned i; struct fence *old_fence = NULL; fence_get(fence); if (!old) { RCU_INIT_POINTER(fobj->shared[0], fence); fobj->shared_count = 1; goto done; } /* * no need to bump fence refcounts, rcu_read access * requires the use of kref_get_unless_zero, and the * references from the old struct are carried over to * the new. */ fobj->shared_count = old->shared_count; for (i = 0; i < old->shared_count; ++i) { struct fence *check; check = rcu_dereference_protected(old->shared[i], reservation_object_held(obj)); if (!old_fence && check->context == fence->context) { old_fence = check; RCU_INIT_POINTER(fobj->shared[i], fence); } else RCU_INIT_POINTER(fobj->shared[i], check); } if (!old_fence) { RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++; } done: preempt_disable(); write_seqcount_begin(&obj->seq); /* * RCU_INIT_POINTER can be used here, * seqcount provides the necessary barriers */ RCU_INIT_POINTER(obj->fence, fobj); write_seqcount_end(&obj->seq); preempt_enable(); if (old) kfree_rcu(old, rcu); if (old_fence) fence_put(old_fence); }
static void add_fence(struct fence **fences, int *i, struct fence *fence) { fences[*i] = fence; if (!fence_is_signaled(fence)) { fence_get(fence); (*i)++; } }
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_ib *ibs, unsigned num_ibs, int (*free_job)(struct amdgpu_job *), void *owner, struct fence **f) { int r = 0; if (amdgpu_enable_scheduler) { struct amdgpu_job *job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) return -ENOMEM; job->base.sched = &ring->sched; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; job->adev = adev; job->ibs = ibs; job->num_ibs = num_ibs; job->base.owner = owner; mutex_init(&job->job_lock); job->free_job = free_job; mutex_lock(&job->job_lock); r = amd_sched_entity_push_job(&job->base); if (r) { mutex_unlock(&job->job_lock); kfree(job); return r; } *f = fence_get(&job->base.s_fence->base); mutex_unlock(&job->job_lock); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) return r; *f = fence_get(&ibs[num_ibs - 1].fence->base); } return 0; }
/** * sync_file_get_fence - get the fence related to the sync_file fd * @fd: sync_file fd to get the fence from * * Ensures @fd references a valid sync_file and returns a fence that * represents all fence in the sync_file. On error NULL is returned. */ struct fence *sync_file_get_fence(int fd) { struct sync_file *sync_file; struct fence *fence; sync_file = sync_file_fdget(fd); if (!sync_file) return NULL; fence = fence_get(sync_file->fence); fput(sync_file->file); return fence; }
static int reservation_cb_add_fence_cb(struct drm_reservation_cb *rcb, struct fence *fence) { int ret = 0; struct drm_reservation_fence_cb *fence_cb; struct drm_reservation_fence_cb **new_fence_cbs; new_fence_cbs = krealloc(rcb->fence_cbs, (rcb->num_fence_cbs + 1) * sizeof(struct drm_reservation_fence_cb *), GFP_KERNEL); if (!new_fence_cbs) return -ENOMEM; rcb->fence_cbs = new_fence_cbs; fence_cb = kzalloc(sizeof(struct drm_reservation_fence_cb), GFP_KERNEL); if (!fence_cb) return -ENOMEM; /* * do not want for fence to disappear on us while we are waiting for * callback and we need it in case we want to remove callbacks */ fence_get(fence); fence_cb->fence = fence; fence_cb->parent = rcb; rcb->fence_cbs[rcb->num_fence_cbs] = fence_cb; atomic_inc(&rcb->count); ret = fence_add_callback(fence, &fence_cb->base, reservation_cb_fence_cb); if (ret == -ENOENT) { /* already signaled */ atomic_dec(&rcb->count); fence_put(fence_cb->fence); kfree(fence_cb); ret = 0; } else if (ret < 0) { atomic_dec(&rcb->count); fence_put(fence_cb->fence); kfree(fence_cb); return ret; } else { rcb->num_fence_cbs++; } return ret; }
/** * radeon_fence_enable_signaling - enable signalling on fence * @fence: fence * * This function is called with fence_queue lock held, and adds a callback * to fence_queue that checks if this fence is signaled, and if so it * signals the fence and removes itself. */ static bool radeon_fence_enable_signaling(struct fence *f) { struct radeon_fence *fence = to_radeon_fence(f); struct radeon_device *rdev = fence->rdev; if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) return false; // if (down_read_trylock(&rdev->exclusive_lock)) { radeon_irq_kms_sw_irq_get(rdev, fence->ring); // if (radeon_fence_activity(rdev, fence->ring)) // wake_up_all_locked(&rdev->fence_queue); /* did fence get signaled after we enabled the sw irq? */ if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { radeon_irq_kms_sw_irq_put(rdev, fence->ring); // up_read(&rdev->exclusive_lock); return false; } // up_read(&rdev->exclusive_lock); // } else { /* we're probably in a lockup, lets not fiddle too much */ // if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) // rdev->fence_drv[fence->ring].delayed_irq = true; // radeon_fence_schedule_check(rdev, fence->ring); } // fence->fence_wake.flags = 0; // fence->fence_wake.private = NULL; fence->fence_wake.func = radeon_fence_check_signaled; __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); fence_get(f); FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); return true; }
static void reservation_object_add_shared_inplace(struct reservation_object *obj, struct reservation_object_list *fobj, struct fence *fence) { u32 i; fence_get(fence); preempt_disable(); write_seqcount_begin(&obj->seq); for (i = 0; i < fobj->shared_count; ++i) { struct fence *old_fence; old_fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(obj)); if (old_fence->context == fence->context) { /* memory barrier is added by write_seqcount_begin */ RCU_INIT_POINTER(fobj->shared[i], fence); write_seqcount_end(&obj->seq); preempt_enable(); fence_put(old_fence); return; } } /* * memory barrier is added by write_seqcount_begin, * fobj->shared_count is protected by this lock too */ RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence); fobj->shared_count++; write_seqcount_end(&obj->seq); preempt_enable(); }
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct fence **f) { int r; job->ring = ring; if (!f) return -EINVAL; r = amd_sched_job_init(&job->base, &ring->sched, entity, owner); if (r) return r; job->owner = owner; job->ctx = entity->fence_context; *f = fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); amd_sched_entity_push_job(&job->base); return 0; }
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; spin_lock(&ctx->ring_lock); if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } if (seq + amdgpu_sched_jobs < cring->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } fence = fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); spin_unlock(&ctx->ring_lock); return fence; }
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct fence **f) { struct fence *fence; int r; job->ring = ring; if (!f) return -EINVAL; r = amd_sched_job_init(&job->base, &ring->sched, entity, amdgpu_job_timeout_func, amdgpu_job_free_func, owner, &fence); if (r) return r; job->owner = owner; job->ctx = entity->fence_context; *f = fence_get(fence); amd_sched_entity_push_job(&job->base); return 0; }
/** * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with * @f: resulting fence object * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; struct fence *old, **ptr; uint32_t seq; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) return -ENOMEM; seq = ++ring->fence_drv.sync_seq; fence->ring = ring; fence_init(&fence->base, &amdgpu_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ old = rcu_dereference_protected(*ptr, 1); if (old && !fence_is_signaled(old)) { DRM_INFO("rcu slot is busy\n"); fence_wait(old, false); } rcu_assign_pointer(*ptr, fence_get(&fence->base)); *f = &fence->base; return 0; }
/** * sync_file_merge() - merge two sync_files * @name: name of new fence * @a: sync_file a * @b: sync_file b * * Creates a new sync_file which contains copies of all the fences in both * @a and @b. @a and @b remain valid, independent sync_file. Returns the * new merged sync_file or NULL in case of error. */ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; struct fence **fences, **nfences, **a_fences, **b_fences; int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) return NULL; a_fences = get_fences(a, &a_num_fences); b_fences = get_fences(b, &b_num_fences); if (a_num_fences > INT_MAX - b_num_fences) return NULL; num_fences = a_num_fences + b_num_fences; fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); if (!fences) goto err; /* * Assume sync_file a and b are both ordered and have no * duplicates with the same context. * * If a sync_file can only be created with sync_file_merge * and sync_file_create, this is a reasonable assumption. */ for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { struct fence *pt_a = a_fences[i_a]; struct fence *pt_b = b_fences[i_b]; if (pt_a->context < pt_b->context) { add_fence(fences, &i, pt_a); i_a++; } else if (pt_a->context > pt_b->context) { add_fence(fences, &i, pt_b); i_b++; } else { if (pt_a->seqno - pt_b->seqno <= INT_MAX) add_fence(fences, &i, pt_a); else add_fence(fences, &i, pt_b); i_a++; i_b++; } } for (; i_a < a_num_fences; i_a++) add_fence(fences, &i, a_fences[i_a]); for (; i_b < b_num_fences; i_b++) add_fence(fences, &i, b_fences[i_b]); if (i == 0) fences[i++] = fence_get(a_fences[0]); if (num_fences > i) { nfences = krealloc(fences, i * sizeof(*fences), GFP_KERNEL); if (!nfences) goto err; fences = nfences; } if (sync_file_set_fence(sync_file, fences, i) < 0) { kfree(fences); goto err; } strlcpy(sync_file->name, name, sizeof(sync_file->name)); return sync_file; err: fput(sync_file->file); return NULL; }
/** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * * @adev: amdgpu_device pointer * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. * * On SI, there are two parallel engines fed from the primary ring, * the CE (Constant Engine) and the DE (Drawing Engine). Since * resource descriptors have moved to memory, the CE allows you to * prime the caches while the DE is updating register state so that * the resource descriptors will be already in cache when the draw is * processed. To accomplish this, the userspace driver submits two * IBs, one for the CE and one for the DE. If there is a CE IB (called * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct fence *last_vm_update, struct amdgpu_job *job, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; struct fence *hwf; uint64_t ctx; unsigned i; int r = 0; if (num_ibs == 0) return -EINVAL; /* ring tests don't use a job */ if (job) { vm = job->vm; ctx = job->ctx; } else { vm = NULL; ctx = 0; } if (!ring->ready) { dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } if (vm && !job->vm_id) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } r = amdgpu_ring_alloc(ring, 256 * num_ibs); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); if (vm) { r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); if (r) { amdgpu_ring_undo(ring); return r; } } if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; skip_preamble = ring->current_ctx == ctx; need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; } if (ring->funcs->emit_hdp_invalidate) amdgpu_ring_emit_hdp_invalidate(ring); r = amdgpu_fence_emit(ring, &hwf); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); amdgpu_ring_undo(ring); return r; } /* wrap the last IB with fence */ if (job && job->uf_bo) { uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); addr += job->uf_offset; amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, AMDGPU_FENCE_FLAG_64BIT); } if (f) *f = fence_get(hwf); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); ring->current_ctx = ctx; amdgpu_ring_commit(ring); return 0; }
/** * radeon_fence_ref - take a ref on a fence * * @fence: radeon fence object * * Take a reference on a fence (all asics). * Returns the fence. */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { fence_get(&fence->base); return fence; }