Beispiel #1
0
/**
 * amdgpu_gart_table_vram_pin - pin gart page table in vram
 *
 * @adev: amdgpu_device pointer
 *
 * Pin the GART page table in vram so it will not be moved
 * by the memory manager (pcie r4xx, r5xx+).  These asics require the
 * gart table to be in video memory.
 * Returns 0 for success, error for failure.
 */
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{
	int r;

	r = amdgpu_bo_reserve(adev->gart.robj, false);
	if (unlikely(r != 0))
		return r;
	r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);
	if (r) {
		amdgpu_bo_unreserve(adev->gart.robj);
		return r;
	}
	r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr);
	if (r)
		amdgpu_bo_unpin(adev->gart.robj);
	amdgpu_bo_unreserve(adev->gart.robj);
	adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
	return r;
}
Beispiel #2
0
/**
 * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
 *
 * @adev: amdgpu_device pointer
 * @num_ibs: number of IBs to schedule
 * @ibs: IB objects to schedule
 * @owner: owner for creating the fences
 *
 * Schedule an IB on the associated ring (all asics).
 * Returns 0 on success, error on failure.
 *
 * On SI, there are two parallel engines fed from the primary ring,
 * the CE (Constant Engine) and the DE (Drawing Engine).  Since
 * resource descriptors have moved to memory, the CE allows you to
 * prime the caches while the DE is updating register state so that
 * the resource descriptors will be already in cache when the draw is
 * processed.  To accomplish this, the userspace driver submits two
 * IBs, one for the CE and one for the DE.  If there is a CE IB (called
 * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
 * to SI there was just a DE IB.
 */
int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
		       struct amdgpu_ib *ibs, void *owner)
{
	struct amdgpu_ib *ib = &ibs[0];
	struct amdgpu_ring *ring;
	struct amdgpu_ctx *ctx, *old_ctx;
	struct amdgpu_vm *vm;
	unsigned i;
	int r = 0;

	if (num_ibs == 0)
		return -EINVAL;

	ring = ibs->ring;
	ctx = ibs->ctx;
	vm = ibs->vm;

	if (!ring->ready) {
		dev_err(adev->dev, "couldn't schedule ib\n");
		return -EINVAL;
	}
	r = amdgpu_sync_wait(&ibs->sync);
	if (r) {
		dev_err(adev->dev, "IB sync failed (%d).\n", r);
		return r;
	}
	r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
	if (r) {
		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
		return r;
	}

	if (vm) {
		/* grab a vm id if necessary */
		r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
		if (r) {
			amdgpu_ring_unlock_undo(ring);
			return r;
		}
	}

	r = amdgpu_sync_rings(&ibs->sync, ring);
	if (r) {
		amdgpu_ring_unlock_undo(ring);
		dev_err(adev->dev, "failed to sync rings (%d)\n", r);
		return r;
	}

	if (vm) {
		/* do context switch */
		amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);

		if (ring->funcs->emit_gds_switch)
			amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
						    ib->gds_base, ib->gds_size,
						    ib->gws_base, ib->gws_size,
						    ib->oa_base, ib->oa_size);

		if (ring->funcs->emit_hdp_flush)
			amdgpu_ring_emit_hdp_flush(ring);
	}

	old_ctx = ring->current_ctx;
	for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

		if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
			ring->current_ctx = old_ctx;
			amdgpu_ring_unlock_undo(ring);
			return -EINVAL;
		}
		amdgpu_ring_emit_ib(ring, ib);
		ring->current_ctx = ctx;
	}

	r = amdgpu_fence_emit(ring, owner, &ib->fence);
	if (r) {
		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
		ring->current_ctx = old_ctx;
		amdgpu_ring_unlock_undo(ring);
		return r;
	}

	if (!amdgpu_enable_scheduler && ib->ctx)
		ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
						    &ib->fence->base);

	/* wrap the last IB with fence */
	if (ib->user) {
		uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
		addr += ib->user->offset;
		amdgpu_ring_emit_fence(ring, addr, ib->sequence,
				       AMDGPU_FENCE_FLAG_64BIT);
	}

	if (ib->vm)
		amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);

	amdgpu_ring_unlock_commit(ring);
	return 0;
}
Beispiel #3
0
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
				void **mem_obj, uint64_t *gpu_addr,
				void **cpu_ptr, bool mqd_gfx9)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_bo *bo = NULL;
	struct amdgpu_bo_param bp;
	int r;
	void *cpu_ptr_tmp = NULL;

	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = PAGE_SIZE;
	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
	bp.type = ttm_bo_type_kernel;
	bp.resv = NULL;

	if (mqd_gfx9)
		bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9;

	r = amdgpu_bo_create(adev, &bp, &bo);
	if (r) {
		dev_err(adev->dev,
			"failed to allocate BO for amdkfd (%d)\n", r);
		return r;
	}

	/* map the buffer */
	r = amdgpu_bo_reserve(bo, true);
	if (r) {
		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
		goto allocate_mem_reserve_bo_failed;
	}

	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
	if (r) {
		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
		goto allocate_mem_pin_bo_failed;
	}

	r = amdgpu_ttm_alloc_gart(&bo->tbo);
	if (r) {
		dev_err(adev->dev, "%p bind failed\n", bo);
		goto allocate_mem_kmap_bo_failed;
	}

	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
	if (r) {
		dev_err(adev->dev,
			"(%d) failed to map bo to kernel for amdkfd\n", r);
		goto allocate_mem_kmap_bo_failed;
	}

	*mem_obj = bo;
	*gpu_addr = amdgpu_bo_gpu_offset(bo);
	*cpu_ptr = cpu_ptr_tmp;

	amdgpu_bo_unreserve(bo);

	return 0;

allocate_mem_kmap_bo_failed:
	amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
	amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
	amdgpu_bo_unref(&bo);

	return r;
}
Beispiel #4
0
static int amdgpufb_create(struct drm_fb_helper *helper,
			   struct drm_fb_helper_surface_size *sizes)
{
	struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper;
	struct amdgpu_device *adev = rfbdev->adev;
	struct fb_info *info;
	struct drm_framebuffer *fb = NULL;
	struct drm_mode_fb_cmd2 mode_cmd;
	struct drm_gem_object *gobj = NULL;
	struct amdgpu_bo *abo = NULL;
	int ret;
	unsigned long tmp;

	mode_cmd.width = sizes->surface_width;
	mode_cmd.height = sizes->surface_height;

	if (sizes->surface_bpp == 24)
		sizes->surface_bpp = 32;

	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
							  sizes->surface_depth);

	ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj);
	if (ret) {
		DRM_ERROR("failed to create fbcon object %d\n", ret);
		return ret;
	}

	abo = gem_to_amdgpu_bo(gobj);

	/* okay we have an object now allocate the framebuffer */
	info = drm_fb_helper_alloc_fbi(helper);
	if (IS_ERR(info)) {
		ret = PTR_ERR(info);
		goto out;
	}

	info->par = rfbdev;
	info->skip_vt_switch = true;

	ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
	if (ret) {
		DRM_ERROR("failed to initialize framebuffer %d\n", ret);
		goto out;
	}

	fb = &rfbdev->rfb.base;

	/* setup helper */
	rfbdev->helper.fb = fb;

	strcpy(info->fix.id, "amdgpudrmfb");

	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth);

	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
	info->fbops = &amdgpufb_ops;

	tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start;
	info->fix.smem_start = adev->mc.aper_base + tmp;
	info->fix.smem_len = amdgpu_bo_size(abo);
	info->screen_base = abo->kptr;
	info->screen_size = amdgpu_bo_size(abo);

	drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height);

	/* setup aperture base/size for vesafb takeover */
	info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
	info->apertures->ranges[0].size = adev->mc.aper_size;

	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */

	if (info->screen_base == NULL) {
		ret = -ENOSPC;
		goto out;
	}

	DRM_INFO("fb mappable at 0x%lX\n",  info->fix.smem_start);
	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)adev->mc.aper_base);
	DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
	DRM_INFO("fb depth is %d\n", fb->format->depth);
	DRM_INFO("   pitch is %d\n", fb->pitches[0]);

	vga_switcheroo_client_fb_set(adev->ddev->pdev, info);
	return 0;

out:
	if (abo) {

	}
	if (fb && ret) {
		drm_gem_object_unreference_unlocked(gobj);
		drm_framebuffer_unregister_private(fb);
		drm_framebuffer_cleanup(fb);
		kfree(fb);
	}
	return ret;
}
Beispiel #5
0
/**
 * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
 *
 * @adev: amdgpu_device pointer
 * @num_ibs: number of IBs to schedule
 * @ibs: IB objects to schedule
 * @f: fence created during this submission
 *
 * Schedule an IB on the associated ring (all asics).
 * Returns 0 on success, error on failure.
 *
 * On SI, there are two parallel engines fed from the primary ring,
 * the CE (Constant Engine) and the DE (Drawing Engine).  Since
 * resource descriptors have moved to memory, the CE allows you to
 * prime the caches while the DE is updating register state so that
 * the resource descriptors will be already in cache when the draw is
 * processed.  To accomplish this, the userspace driver submits two
 * IBs, one for the CE and one for the DE.  If there is a CE IB (called
 * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
 * to SI there was just a DE IB.
 */
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
		       struct amdgpu_job *job, struct fence **f)
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_ib *ib = &ibs[0];
	bool skip_preamble, need_ctx_switch;
	unsigned patch_offset = ~0;
	struct amdgpu_vm *vm;
	struct fence *hwf;
	uint64_t ctx;

	unsigned i;
	int r = 0;

	if (num_ibs == 0)
		return -EINVAL;

	/* ring tests don't use a job */
	if (job) {
		vm = job->vm;
		ctx = job->ctx;
	} else {
		vm = NULL;
		ctx = 0;
	}

	if (!ring->ready) {
		dev_err(adev->dev, "couldn't schedule ib\n");
		return -EINVAL;
	}

	if (vm && !job->vm_id) {
		dev_err(adev->dev, "VM IB without ID\n");
		return -EINVAL;
	}

	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
	if (r) {
		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
		return r;
	}

	if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);

	if (vm) {
		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
				    job->gds_base, job->gds_size,
				    job->gws_base, job->gws_size,
				    job->oa_base, job->oa_size);
		if (r) {
			amdgpu_ring_undo(ring);
			return r;
		}
	}

	if (ring->funcs->emit_hdp_flush)
		amdgpu_ring_emit_hdp_flush(ring);

	/* always set cond_exec_polling to CONTINUE */
	*ring->cond_exe_cpu_addr = 1;

	skip_preamble = ring->current_ctx == ctx;
	need_ctx_switch = ring->current_ctx != ctx;
	for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

		/* drop preamble IBs if we don't have a context switch */
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
			continue;

		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
				    need_ctx_switch);
		need_ctx_switch = false;
	}

	if (ring->funcs->emit_hdp_invalidate)
		amdgpu_ring_emit_hdp_invalidate(ring);

	r = amdgpu_fence_emit(ring, &hwf);
	if (r) {
		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
		if (job && job->vm_id)
			amdgpu_vm_reset_id(adev, job->vm_id);
		amdgpu_ring_undo(ring);
		return r;
	}

	/* wrap the last IB with fence */
	if (job && job->uf_bo) {
		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);

		addr += job->uf_offset;
		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
				       AMDGPU_FENCE_FLAG_64BIT);
	}

	if (f)
		*f = fence_get(hwf);

	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
		amdgpu_ring_patch_cond_exec(ring, patch_offset);

	ring->current_ctx = ctx;
	amdgpu_ring_commit(ring);
	return 0;
}