Beispiel #1
0
int
vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
			   struct drm_file *file_priv)
{
	struct drm_vc4_create_shader_bo *args = data;
	struct vc4_bo *bo = NULL;
	int ret;

	if (args->size == 0)
		return -EINVAL;

	if (args->size % sizeof(u64) != 0)
		return -EINVAL;

	if (args->flags != 0) {
		DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
		return -EINVAL;
	}

	if (args->pad != 0) {
		DRM_INFO("Pad set: 0x%08x\n", args->pad);
		return -EINVAL;
	}

	bo = vc4_bo_create(dev, args->size, true);
	if (IS_ERR(bo))
		return PTR_ERR(bo);

	if (copy_from_user(bo->base.vaddr,
			     (void __user *)(uintptr_t)args->data,
			     args->size)) {
		ret = -EFAULT;
		goto fail;
	}
	/* Clear the rest of the memory from allocating from the BO
	 * cache.
	 */
	memset(bo->base.vaddr + args->size, 0,
	       bo->base.base.size - args->size);

	bo->validated_shader = vc4_validate_shader(&bo->base);
	if (!bo->validated_shader) {
		ret = -EINVAL;
		goto fail;
	}

	/* We have to create the handle after validation, to avoid
	 * races for users to do doing things like mmap the shader BO.
	 */
	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);

 fail:
	drm_gem_object_unreference_unlocked(&bo->base.base);

	return ret;
}
Beispiel #2
0
static void
vc4_overflow_mem_work(struct work_struct *work)
{
	struct vc4_dev *vc4 =
		container_of(work, struct vc4_dev, overflow_mem_work);
	struct drm_device *dev = vc4->dev;
	struct vc4_bo *bo;

	bo = vc4_bo_create(dev, 256 * 1024, true);
	if (IS_ERR(bo)) {
		DRM_ERROR("Couldn't allocate binner overflow mem\n");
		return;
	}

	/* If there's a job executing currently, then our previous
	 * overflow allocation is getting used in that job and we need
	 * to queue it to be released when the job is done.  But if no
	 * job is executing at all, then we can free the old overflow
	 * object direcctly.
	 *
	 * No lock necessary for this pointer since we're the only
	 * ones that update the pointer, and our workqueue won't
	 * reenter.
	 */
	if (vc4->overflow_mem) {
		struct vc4_exec_info *current_exec;
		unsigned long irqflags;

		spin_lock_irqsave(&vc4->job_lock, irqflags);
		current_exec = vc4_first_bin_job(vc4);
		if (current_exec) {
			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
			list_add_tail(&vc4->overflow_mem->unref_head,
				      &current_exec->unref_list);
			vc4->overflow_mem = NULL;
		}
		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
	}

	if (vc4->overflow_mem)
		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
	vc4->overflow_mem = bo;

	V3D_WRITE(V3D_BPOA, bo->base.paddr);
	V3D_WRITE(V3D_BPOS, bo->base.base.size);
	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
}
Beispiel #3
0
int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
	struct drm_vc4_create_bo *args = data;
	struct vc4_bo *bo = NULL;
	int ret;

	/*
	 * We can't allocate from the BO cache, because the BOs don't
	 * get zeroed, and that might leak data between users.
	 */
	bo = vc4_bo_create(dev, args->size, false);
	if (!bo)
		return -ENOMEM;

	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
	drm_gem_object_unreference_unlocked(&bo->base.base);

	return ret;
}
Beispiel #4
0
int vc4_dumb_create(struct drm_file *file_priv,
		    struct drm_device *dev,
		    struct drm_mode_create_dumb *args)
{
	int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
	struct vc4_bo *bo = NULL;
	int ret;

	if (args->pitch < min_pitch)
		args->pitch = min_pitch;

	if (args->size < args->pitch * args->height)
		args->size = args->pitch * args->height;

	bo = vc4_bo_create(dev, args->size, false);
	if (!bo)
		return -ENOMEM;

	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
	drm_gem_object_unreference_unlocked(&bo->base.base);

	return ret;
}
Beispiel #5
0
/**
 * vc4_allocate_bin_bo() - allocates the memory that will be used for
 * tile binning.
 *
 * The binner has a limitation that the addresses in the tile state
 * buffer that point into the tile alloc buffer or binner overflow
 * memory only have 28 bits (256MB), and the top 4 on the bus for
 * tile alloc references end up coming from the tile state buffer's
 * address.
 *
 * To work around this, we allocate a single large buffer while V3D is
 * in use, make sure that it has the top 4 bits constant across its
 * entire extent, and then put the tile state, tile alloc, and binner
 * overflow memory inside that buffer.
 *
 * This creates a limitation where we may not be able to execute a job
 * if it doesn't fit within the buffer that we allocated up front.
 * However, it turns out that 16MB is "enough for anybody", and
 * real-world applications run into allocation failures from the
 * overall CMA pool before they make scenes complicated enough to run
 * out of bin space.
 */
int
vc4_allocate_bin_bo(struct drm_device *drm)
{
	struct vc4_dev *vc4 = to_vc4_dev(drm);
	struct vc4_v3d *v3d = vc4->v3d;
	uint32_t size = 16 * 1024 * 1024;
	int ret = 0;
	struct list_head list;

	/* We may need to try allocating more than once to get a BO
	 * that doesn't cross 256MB.  Track the ones we've allocated
	 * that failed so far, so that we can free them when we've got
	 * one that succeeded (if we freed them right away, our next
	 * allocation would probably be the same chunk of memory).
	 */
	INIT_LIST_HEAD(&list);

	while (true) {
		struct vc4_bo *bo = vc4_bo_create(drm, size, true,
						  VC4_BO_TYPE_BIN);

		if (IS_ERR(bo)) {
			ret = PTR_ERR(bo);

			dev_err(&v3d->pdev->dev,
				"Failed to allocate memory for tile binning: "
				"%d. You may need to enable CMA or give it "
				"more memory.",
				ret);
			break;
		}

		/* Check if this BO won't trigger the addressing bug. */
		if ((bo->base.paddr & 0xf0000000) ==
		    ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) {
			vc4->bin_bo = bo;

			/* Set up for allocating 512KB chunks of
			 * binner memory.  The biggest allocation we
			 * need to do is for the initial tile alloc +
			 * tile state buffer.  We can render to a
			 * maximum of ((2048*2048) / (32*32) = 4096
			 * tiles in a frame (until we do floating
			 * point rendering, at which point it would be
			 * 8192).  Tile state is 48b/tile (rounded to
			 * a page), and tile alloc is 32b/tile
			 * (rounded to a page), plus a page of extra,
			 * for a total of 320kb for our worst-case.
			 * We choose 512kb so that it divides evenly
			 * into our 16MB, and the rest of the 512kb
			 * will be used as storage for the overflow
			 * from the initial 32b CL per bin.
			 */
			vc4->bin_alloc_size = 512 * 1024;
			vc4->bin_alloc_used = 0;
			vc4->bin_alloc_overflow = 0;
			WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
				     bo->base.base.size / vc4->bin_alloc_size);

			break;
		}

		/* Put it on the list to free later, and try again. */
		list_add(&bo->unref_head, &list);
	}

	/* Free all the BOs we allocated but didn't choose. */
	while (!list_empty(&list)) {
		struct vc4_bo *bo = list_last_entry(&list,
						    struct vc4_bo, unref_head);

		list_del(&bo->unref_head);
		drm_gem_object_put_unlocked(&bo->base.base);
	}

	return ret;
}
Beispiel #6
0
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
			     struct vc4_rcl_setup *setup)
{
	struct drm_vc4_submit_cl *args = exec->args;
	bool has_bin = args->bin_cl_size != 0;
	uint8_t min_x_tile = args->min_x_tile;
	uint8_t min_y_tile = args->min_y_tile;
	uint8_t max_x_tile = args->max_x_tile;
	uint8_t max_y_tile = args->max_y_tile;
	uint8_t xtiles = max_x_tile - min_x_tile + 1;
	uint8_t ytiles = max_y_tile - min_y_tile + 1;
	uint8_t x, y;
	uint32_t size, loop_body_size;

	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;

	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
		size += VC4_PACKET_CLEAR_COLORS_SIZE +
			VC4_PACKET_TILE_COORDINATES_SIZE +
			VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
	}

	if (setup->color_read) {
		if (args->color_read.flags &
		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
		} else {
			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
		}
	}
	if (setup->zs_read) {
		if (args->zs_read.flags &
		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
		} else {
			if (setup->color_read &&
			    !(args->color_read.flags &
			      VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
				loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
				loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
			}
			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
		}
	}

	if (has_bin) {
		size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
		loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
	}

	if (setup->msaa_color_write)
		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
	if (setup->msaa_zs_write)
		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;

	if (setup->zs_write)
		loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
	if (setup->color_write)
		loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;

	/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
	loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
		((setup->msaa_color_write != NULL) +
		 (setup->msaa_zs_write != NULL) +
		 (setup->color_write != NULL) +
		 (setup->zs_write != NULL) - 1);

	size += xtiles * ytiles * loop_body_size;

	setup->rcl = &vc4_bo_create(dev, size, true)->base;
	if (IS_ERR(setup->rcl))
		return PTR_ERR(setup->rcl);
	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
		      &exec->unref_list);

	/* The tile buffer gets cleared when the previous tile is stored.  If
	 * the clear values changed between frames, then the tile buffer has
	 * stale clear values in it, so we have to do a store in None mode (no
	 * writes) so that we trigger the tile buffer clear.
	 */
	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
		rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
		rcl_u32(setup, args->clear_color[0]);
		rcl_u32(setup, args->clear_color[1]);
		rcl_u32(setup, args->clear_z);
		rcl_u8(setup, args->clear_s);

		vc4_tile_coordinates(setup, 0, 0);

		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
		rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
		rcl_u32(setup, 0); /* no address, since we're in None mode */
	}

	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
	rcl_u32(setup,
		(setup->color_write ? (setup->color_write->paddr +
				       args->color_write.offset) :
		 0));
	rcl_u16(setup, args->width);
	rcl_u16(setup, args->height);
	rcl_u16(setup, args->color_write.bits);

	for (y = min_y_tile; y <= max_y_tile; y++) {
		for (x = min_x_tile; x <= max_x_tile; x++) {
			bool first = (x == min_x_tile && y == min_y_tile);
			bool last = (x == max_x_tile && y == max_y_tile);

			emit_tile(exec, setup, x, y, first, last);
		}
	}

	BUG_ON(setup->next_offset != size);
	exec->ct1ca = setup->rcl->paddr;
	exec->ct1ea = setup->rcl->paddr + setup->next_offset;

	return 0;
}