예제 #1
0
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
			      struct fence *fence)
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
	uint64_t seq = cring->sequence;
	unsigned idx = 0;
	struct fence *other = NULL;

	idx = seq & (amdgpu_sched_jobs - 1);
	other = cring->fences[idx];
	if (other) {
		signed long r;
		r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
		if (r < 0)
			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
	}

	fence_get(fence);

	spin_lock(&ctx->ring_lock);
	cring->fences[idx] = fence;
	cring->sequence++;
	spin_unlock(&ctx->ring_lock);

	fence_put(other);

	return seq;
}
예제 #2
0
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
{
	struct amdgpu_fence *fence = NULL;
	struct amdgpu_job *job;
	int r;

	if (!sched_job) {
		DRM_ERROR("job is null\n");
		return NULL;
	}
	job = to_amdgpu_job(sched_job);
	mutex_lock(&job->job_lock);
	r = amdgpu_ib_schedule(job->adev,
			       job->num_ibs,
			       job->ibs,
			       job->base.owner);
	if (r) {
		DRM_ERROR("Error scheduling IBs (%d)\n", r);
		goto err;
	}

	fence = job->ibs[job->num_ibs - 1].fence;
	fence_get(&fence->base);

err:
	if (job->free_job)
		job->free_job(job);

	mutex_unlock(&job->job_lock);
	fence_put(&job->base.s_fence->base);
	kfree(job);
	return fence ? &fence->base : NULL;
}
예제 #3
0
void reservation_object_add_excl_fence(struct reservation_object *obj,
				       struct fence *fence)
{
	struct fence *old_fence = reservation_object_get_excl(obj);
	struct reservation_object_list *old;
	u32 i = 0;

	old = reservation_object_get_list(obj);
	if (old)
		i = old->shared_count;

	if (fence)
		fence_get(fence);

	preempt_disable();
	write_seqcount_begin(&obj->seq);
	/* write_seqcount_begin provides the necessary memory barrier */
	RCU_INIT_POINTER(obj->fence_excl, fence);
	if (old)
		old->shared_count = 0;
	write_seqcount_end(&obj->seq);
	preempt_enable();

	/* inplace update, no shared fences */
	while (i--)
		fence_put(rcu_dereference_protected(old->shared[i],
						reservation_object_held(obj)));

	if (old_fence)
		fence_put(old_fence);
}
예제 #4
0
static void
reservation_object_add_shared_replace(struct reservation_object *obj,
				      struct reservation_object_list *old,
				      struct reservation_object_list *fobj,
				      struct fence *fence)
{
	unsigned i;
	struct fence *old_fence = NULL;

	fence_get(fence);

	if (!old) {
		RCU_INIT_POINTER(fobj->shared[0], fence);
		fobj->shared_count = 1;
		goto done;
	}

	/*
	 * no need to bump fence refcounts, rcu_read access
	 * requires the use of kref_get_unless_zero, and the
	 * references from the old struct are carried over to
	 * the new.
	 */
	fobj->shared_count = old->shared_count;

	for (i = 0; i < old->shared_count; ++i) {
		struct fence *check;

		check = rcu_dereference_protected(old->shared[i],
						reservation_object_held(obj));

		if (!old_fence && check->context == fence->context) {
			old_fence = check;
			RCU_INIT_POINTER(fobj->shared[i], fence);
		} else
			RCU_INIT_POINTER(fobj->shared[i], check);
	}
	if (!old_fence) {
		RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
		fobj->shared_count++;
	}

done:
	preempt_disable();
	write_seqcount_begin(&obj->seq);
	/*
	 * RCU_INIT_POINTER can be used here,
	 * seqcount provides the necessary barriers
	 */
	RCU_INIT_POINTER(obj->fence, fobj);
	write_seqcount_end(&obj->seq);
	preempt_enable();

	if (old)
		kfree_rcu(old, rcu);

	if (old_fence)
		fence_put(old_fence);
}
예제 #5
0
파일: sync_file.c 프로젝트: acton393/linux
static void add_fence(struct fence **fences, int *i, struct fence *fence)
{
	fences[*i] = fence;

	if (!fence_is_signaled(fence)) {
		fence_get(fence);
		(*i)++;
	}
}
예제 #6
0
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
					 struct amdgpu_ring *ring,
					 struct amdgpu_ib *ibs,
					 unsigned num_ibs,
					 int (*free_job)(struct amdgpu_job *),
					 void *owner,
					 struct fence **f)
{
	int r = 0;
	if (amdgpu_enable_scheduler) {
		struct amdgpu_job *job =
			kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
		if (!job)
			return -ENOMEM;
		job->base.sched = &ring->sched;
		job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
		job->adev = adev;
		job->ibs = ibs;
		job->num_ibs = num_ibs;
		job->base.owner = owner;
		mutex_init(&job->job_lock);
		job->free_job = free_job;
		mutex_lock(&job->job_lock);
		r = amd_sched_entity_push_job(&job->base);
		if (r) {
			mutex_unlock(&job->job_lock);
			kfree(job);
			return r;
		}
		*f = fence_get(&job->base.s_fence->base);
		mutex_unlock(&job->job_lock);
	} else {
		r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
		if (r)
			return r;
		*f = fence_get(&ibs[num_ibs - 1].fence->base);
	}

	return 0;
}
예제 #7
0
파일: sync_file.c 프로젝트: acton393/linux
/**
 * sync_file_get_fence - get the fence related to the sync_file fd
 * @fd:		sync_file fd to get the fence from
 *
 * Ensures @fd references a valid sync_file and returns a fence that
 * represents all fence in the sync_file. On error NULL is returned.
 */
struct fence *sync_file_get_fence(int fd)
{
	struct sync_file *sync_file;
	struct fence *fence;

	sync_file = sync_file_fdget(fd);
	if (!sync_file)
		return NULL;

	fence = fence_get(sync_file->fence);
	fput(sync_file->file);

	return fence;
}
예제 #8
0
static int
reservation_cb_add_fence_cb(struct drm_reservation_cb *rcb, struct fence *fence)
{
	int ret = 0;
	struct drm_reservation_fence_cb *fence_cb;
	struct drm_reservation_fence_cb **new_fence_cbs;

	new_fence_cbs = krealloc(rcb->fence_cbs,
				(rcb->num_fence_cbs + 1)
				* sizeof(struct drm_reservation_fence_cb *),
				GFP_KERNEL);
	if (!new_fence_cbs)
		return -ENOMEM;
	rcb->fence_cbs = new_fence_cbs;

	fence_cb = kzalloc(sizeof(struct drm_reservation_fence_cb), GFP_KERNEL);
	if (!fence_cb)
		return -ENOMEM;

	/*
	 * do not want for fence to disappear on us while we are waiting for
	 * callback and we need it in case we want to remove callbacks
	 */
	fence_get(fence);
	fence_cb->fence = fence;
	fence_cb->parent = rcb;
	rcb->fence_cbs[rcb->num_fence_cbs] = fence_cb;
	atomic_inc(&rcb->count);
	ret = fence_add_callback(fence, &fence_cb->base,
					reservation_cb_fence_cb);
	if (ret == -ENOENT) {
		/* already signaled */
		atomic_dec(&rcb->count);
		fence_put(fence_cb->fence);
		kfree(fence_cb);
		ret = 0;
	} else if (ret < 0) {
		atomic_dec(&rcb->count);
		fence_put(fence_cb->fence);
		kfree(fence_cb);
		return ret;
	} else {
		rcb->num_fence_cbs++;
	}
	return ret;
}
예제 #9
0
/**
 * radeon_fence_enable_signaling - enable signalling on fence
 * @fence: fence
 *
 * This function is called with fence_queue lock held, and adds a callback
 * to fence_queue that checks if this fence is signaled, and if so it
 * signals the fence and removes itself.
 */
static bool radeon_fence_enable_signaling(struct fence *f)
{
	struct radeon_fence *fence = to_radeon_fence(f);
	struct radeon_device *rdev = fence->rdev;

	if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
		return false;

//   if (down_read_trylock(&rdev->exclusive_lock))
         {
		radeon_irq_kms_sw_irq_get(rdev, fence->ring);

//       if (radeon_fence_activity(rdev, fence->ring))
//           wake_up_all_locked(&rdev->fence_queue);

		/* did fence get signaled after we enabled the sw irq? */
		if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
			radeon_irq_kms_sw_irq_put(rdev, fence->ring);
//           up_read(&rdev->exclusive_lock);
			return false;
		}

//       up_read(&rdev->exclusive_lock);
//   } else {
		/* we're probably in a lockup, lets not fiddle too much */
//       if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
//           rdev->fence_drv[fence->ring].delayed_irq = true;
//       radeon_fence_schedule_check(rdev, fence->ring);
	}

//	fence->fence_wake.flags = 0;
//	fence->fence_wake.private = NULL;
	fence->fence_wake.func = radeon_fence_check_signaled;
	__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
	fence_get(f);

	FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
	return true;
}
예제 #10
0
static void
reservation_object_add_shared_inplace(struct reservation_object *obj,
				      struct reservation_object_list *fobj,
				      struct fence *fence)
{
	u32 i;

	fence_get(fence);

	preempt_disable();
	write_seqcount_begin(&obj->seq);

	for (i = 0; i < fobj->shared_count; ++i) {
		struct fence *old_fence;

		old_fence = rcu_dereference_protected(fobj->shared[i],
						reservation_object_held(obj));

		if (old_fence->context == fence->context) {
			/* memory barrier is added by write_seqcount_begin */
			RCU_INIT_POINTER(fobj->shared[i], fence);
			write_seqcount_end(&obj->seq);
			preempt_enable();

			fence_put(old_fence);
			return;
		}
	}

	/*
	 * memory barrier is added by write_seqcount_begin,
	 * fobj->shared_count is protected by this lock too
	 */
	RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
	fobj->shared_count++;

	write_seqcount_end(&obj->seq);
	preempt_enable();
}
예제 #11
0
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
		      struct amd_sched_entity *entity, void *owner,
		      struct fence **f)
{
	int r;
	job->ring = ring;

	if (!f)
		return -EINVAL;

	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
	if (r)
		return r;

	job->owner = owner;
	job->ctx = entity->fence_context;
	*f = fence_get(&job->base.s_fence->finished);
	amdgpu_job_free_resources(job);
	amd_sched_entity_push_job(&job->base);

	return 0;
}
예제 #12
0
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
				   struct amdgpu_ring *ring, uint64_t seq)
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
	struct fence *fence;

	spin_lock(&ctx->ring_lock);

	if (seq >= cring->sequence) {
		spin_unlock(&ctx->ring_lock);
		return ERR_PTR(-EINVAL);
	}


	if (seq + amdgpu_sched_jobs < cring->sequence) {
		spin_unlock(&ctx->ring_lock);
		return NULL;
	}

	fence = fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
	spin_unlock(&ctx->ring_lock);

	return fence;
}
예제 #13
0
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
		      struct amd_sched_entity *entity, void *owner,
		      struct fence **f)
{
	struct fence *fence;
	int r;
	job->ring = ring;

	if (!f)
		return -EINVAL;

	r = amd_sched_job_init(&job->base, &ring->sched,
			       entity, amdgpu_job_timeout_func,
			       amdgpu_job_free_func, owner, &fence);
	if (r)
		return r;

	job->owner = owner;
	job->ctx = entity->fence_context;
	*f = fence_get(fence);
	amd_sched_entity_push_job(&job->base);

	return 0;
}
예제 #14
0
/**
 * amdgpu_fence_emit - emit a fence on the requested ring
 *
 * @ring: ring the fence is associated with
 * @f: resulting fence object
 *
 * Emits a fence command on the requested ring (all asics).
 * Returns 0 on success, -ENOMEM on failure.
 */
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_fence *fence;
	struct fence *old, **ptr;
	uint32_t seq;

	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
	if (fence == NULL)
		return -ENOMEM;

	seq = ++ring->fence_drv.sync_seq;
	fence->ring = ring;
	fence_init(&fence->base, &amdgpu_fence_ops,
		   &ring->fence_drv.lock,
		   adev->fence_context + ring->idx,
		   seq);
	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
			       seq, AMDGPU_FENCE_FLAG_INT);

	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
	/* This function can't be called concurrently anyway, otherwise
	 * emitting the fence would mess up the hardware ring buffer.
	 */
	old = rcu_dereference_protected(*ptr, 1);
	if (old && !fence_is_signaled(old)) {
		DRM_INFO("rcu slot is busy\n");
		fence_wait(old, false);
	}

	rcu_assign_pointer(*ptr, fence_get(&fence->base));

	*f = &fence->base;

	return 0;
}
예제 #15
0
파일: sync_file.c 프로젝트: acton393/linux
/**
 * sync_file_merge() - merge two sync_files
 * @name:	name of new fence
 * @a:		sync_file a
 * @b:		sync_file b
 *
 * Creates a new sync_file which contains copies of all the fences in both
 * @a and @b.  @a and @b remain valid, independent sync_file. Returns the
 * new merged sync_file or NULL in case of error.
 */
static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
					 struct sync_file *b)
{
	struct sync_file *sync_file;
	struct fence **fences, **nfences, **a_fences, **b_fences;
	int i, i_a, i_b, num_fences, a_num_fences, b_num_fences;

	sync_file = sync_file_alloc();
	if (!sync_file)
		return NULL;

	a_fences = get_fences(a, &a_num_fences);
	b_fences = get_fences(b, &b_num_fences);
	if (a_num_fences > INT_MAX - b_num_fences)
		return NULL;

	num_fences = a_num_fences + b_num_fences;

	fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL);
	if (!fences)
		goto err;

	/*
	 * Assume sync_file a and b are both ordered and have no
	 * duplicates with the same context.
	 *
	 * If a sync_file can only be created with sync_file_merge
	 * and sync_file_create, this is a reasonable assumption.
	 */
	for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) {
		struct fence *pt_a = a_fences[i_a];
		struct fence *pt_b = b_fences[i_b];

		if (pt_a->context < pt_b->context) {
			add_fence(fences, &i, pt_a);

			i_a++;
		} else if (pt_a->context > pt_b->context) {
			add_fence(fences, &i, pt_b);

			i_b++;
		} else {
			if (pt_a->seqno - pt_b->seqno <= INT_MAX)
				add_fence(fences, &i, pt_a);
			else
				add_fence(fences, &i, pt_b);

			i_a++;
			i_b++;
		}
	}

	for (; i_a < a_num_fences; i_a++)
		add_fence(fences, &i, a_fences[i_a]);

	for (; i_b < b_num_fences; i_b++)
		add_fence(fences, &i, b_fences[i_b]);

	if (i == 0)
		fences[i++] = fence_get(a_fences[0]);

	if (num_fences > i) {
		nfences = krealloc(fences, i * sizeof(*fences),
				  GFP_KERNEL);
		if (!nfences)
			goto err;

		fences = nfences;
	}

	if (sync_file_set_fence(sync_file, fences, i) < 0) {
		kfree(fences);
		goto err;
	}

	strlcpy(sync_file->name, name, sizeof(sync_file->name));
	return sync_file;

err:
	fput(sync_file->file);
	return NULL;

}
예제 #16
0
파일: amdgpu_ib.c 프로젝트: 513855417/linux
/**
 * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
 *
 * @adev: amdgpu_device pointer
 * @num_ibs: number of IBs to schedule
 * @ibs: IB objects to schedule
 * @f: fence created during this submission
 *
 * Schedule an IB on the associated ring (all asics).
 * Returns 0 on success, error on failure.
 *
 * On SI, there are two parallel engines fed from the primary ring,
 * the CE (Constant Engine) and the DE (Drawing Engine).  Since
 * resource descriptors have moved to memory, the CE allows you to
 * prime the caches while the DE is updating register state so that
 * the resource descriptors will be already in cache when the draw is
 * processed.  To accomplish this, the userspace driver submits two
 * IBs, one for the CE and one for the DE.  If there is a CE IB (called
 * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
 * to SI there was just a DE IB.
 */
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
		       struct amdgpu_job *job, struct fence **f)
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_ib *ib = &ibs[0];
	bool skip_preamble, need_ctx_switch;
	unsigned patch_offset = ~0;
	struct amdgpu_vm *vm;
	struct fence *hwf;
	uint64_t ctx;

	unsigned i;
	int r = 0;

	if (num_ibs == 0)
		return -EINVAL;

	/* ring tests don't use a job */
	if (job) {
		vm = job->vm;
		ctx = job->ctx;
	} else {
		vm = NULL;
		ctx = 0;
	}

	if (!ring->ready) {
		dev_err(adev->dev, "couldn't schedule ib\n");
		return -EINVAL;
	}

	if (vm && !job->vm_id) {
		dev_err(adev->dev, "VM IB without ID\n");
		return -EINVAL;
	}

	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
	if (r) {
		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
		return r;
	}

	if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);

	if (vm) {
		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
				    job->gds_base, job->gds_size,
				    job->gws_base, job->gws_size,
				    job->oa_base, job->oa_size);
		if (r) {
			amdgpu_ring_undo(ring);
			return r;
		}
	}

	if (ring->funcs->emit_hdp_flush)
		amdgpu_ring_emit_hdp_flush(ring);

	/* always set cond_exec_polling to CONTINUE */
	*ring->cond_exe_cpu_addr = 1;

	skip_preamble = ring->current_ctx == ctx;
	need_ctx_switch = ring->current_ctx != ctx;
	for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

		/* drop preamble IBs if we don't have a context switch */
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
			continue;

		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
				    need_ctx_switch);
		need_ctx_switch = false;
	}

	if (ring->funcs->emit_hdp_invalidate)
		amdgpu_ring_emit_hdp_invalidate(ring);

	r = amdgpu_fence_emit(ring, &hwf);
	if (r) {
		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
		if (job && job->vm_id)
			amdgpu_vm_reset_id(adev, job->vm_id);
		amdgpu_ring_undo(ring);
		return r;
	}

	/* wrap the last IB with fence */
	if (job && job->uf_bo) {
		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);

		addr += job->uf_offset;
		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
				       AMDGPU_FENCE_FLAG_64BIT);
	}

	if (f)
		*f = fence_get(hwf);

	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
		amdgpu_ring_patch_cond_exec(ring, patch_offset);

	ring->current_ctx = ctx;
	amdgpu_ring_commit(ring);
	return 0;
}
예제 #17
0
/**
 * radeon_fence_ref - take a ref on a fence
 *
 * @fence: radeon fence object
 *
 * Take a reference on a fence (all asics).
 * Returns the fence.
 */
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
{
	fence_get(&fence->base);
	return fence;
}