int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_vc4_create_shader_bo *args = data; struct vc4_bo *bo = NULL; int ret; if (args->size == 0) return -EINVAL; if (args->size % sizeof(u64) != 0) return -EINVAL; if (args->flags != 0) { DRM_INFO("Unknown flags set: 0x%08x\n", args->flags); return -EINVAL; } if (args->pad != 0) { DRM_INFO("Pad set: 0x%08x\n", args->pad); return -EINVAL; } bo = vc4_bo_create(dev, args->size, true); if (IS_ERR(bo)) return PTR_ERR(bo); if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, args->size)) { ret = -EFAULT; goto fail; } /* Clear the rest of the memory from allocating from the BO * cache. */ memset(bo->base.vaddr + args->size, 0, bo->base.base.size - args->size); bo->validated_shader = vc4_validate_shader(&bo->base); if (!bo->validated_shader) { ret = -EINVAL; goto fail; } /* We have to create the handle after validation, to avoid * races for users to do doing things like mmap the shader BO. */ ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); fail: drm_gem_object_unreference_unlocked(&bo->base.base); return ret; }
static void vc4_overflow_mem_work(struct work_struct *work) { struct vc4_dev *vc4 = container_of(work, struct vc4_dev, overflow_mem_work); struct drm_device *dev = vc4->dev; struct vc4_bo *bo; bo = vc4_bo_create(dev, 256 * 1024, true); if (IS_ERR(bo)) { DRM_ERROR("Couldn't allocate binner overflow mem\n"); return; } /* If there's a job executing currently, then our previous * overflow allocation is getting used in that job and we need * to queue it to be released when the job is done. But if no * job is executing at all, then we can free the old overflow * object direcctly. * * No lock necessary for this pointer since we're the only * ones that update the pointer, and our workqueue won't * reenter. */ if (vc4->overflow_mem) { struct vc4_exec_info *current_exec; unsigned long irqflags; spin_lock_irqsave(&vc4->job_lock, irqflags); current_exec = vc4_first_bin_job(vc4); if (current_exec) { vc4->overflow_mem->seqno = vc4->finished_seqno + 1; list_add_tail(&vc4->overflow_mem->unref_head, ¤t_exec->unref_list); vc4->overflow_mem = NULL; } spin_unlock_irqrestore(&vc4->job_lock, irqflags); } if (vc4->overflow_mem) drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); vc4->overflow_mem = bo; V3D_WRITE(V3D_BPOA, bo->base.paddr); V3D_WRITE(V3D_BPOS, bo->base.base.size); V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM); V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); }
int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_vc4_create_bo *args = data; struct vc4_bo *bo = NULL; int ret; /* * We can't allocate from the BO cache, because the BOs don't * get zeroed, and that might leak data between users. */ bo = vc4_bo_create(dev, args->size, false); if (!bo) return -ENOMEM; ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_unreference_unlocked(&bo->base.base); return ret; }
int vc4_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) { int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); struct vc4_bo *bo = NULL; int ret; if (args->pitch < min_pitch) args->pitch = min_pitch; if (args->size < args->pitch * args->height) args->size = args->pitch * args->height; bo = vc4_bo_create(dev, args->size, false); if (!bo) return -ENOMEM; ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle); drm_gem_object_unreference_unlocked(&bo->base.base); return ret; }
/** * vc4_allocate_bin_bo() - allocates the memory that will be used for * tile binning. * * The binner has a limitation that the addresses in the tile state * buffer that point into the tile alloc buffer or binner overflow * memory only have 28 bits (256MB), and the top 4 on the bus for * tile alloc references end up coming from the tile state buffer's * address. * * To work around this, we allocate a single large buffer while V3D is * in use, make sure that it has the top 4 bits constant across its * entire extent, and then put the tile state, tile alloc, and binner * overflow memory inside that buffer. * * This creates a limitation where we may not be able to execute a job * if it doesn't fit within the buffer that we allocated up front. * However, it turns out that 16MB is "enough for anybody", and * real-world applications run into allocation failures from the * overall CMA pool before they make scenes complicated enough to run * out of bin space. */ int vc4_allocate_bin_bo(struct drm_device *drm) { struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = vc4->v3d; uint32_t size = 16 * 1024 * 1024; int ret = 0; struct list_head list; /* We may need to try allocating more than once to get a BO * that doesn't cross 256MB. Track the ones we've allocated * that failed so far, so that we can free them when we've got * one that succeeded (if we freed them right away, our next * allocation would probably be the same chunk of memory). */ INIT_LIST_HEAD(&list); while (true) { struct vc4_bo *bo = vc4_bo_create(drm, size, true, VC4_BO_TYPE_BIN); if (IS_ERR(bo)) { ret = PTR_ERR(bo); dev_err(&v3d->pdev->dev, "Failed to allocate memory for tile binning: " "%d. You may need to enable CMA or give it " "more memory.", ret); break; } /* Check if this BO won't trigger the addressing bug. */ if ((bo->base.paddr & 0xf0000000) == ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) { vc4->bin_bo = bo; /* Set up for allocating 512KB chunks of * binner memory. The biggest allocation we * need to do is for the initial tile alloc + * tile state buffer. We can render to a * maximum of ((2048*2048) / (32*32) = 4096 * tiles in a frame (until we do floating * point rendering, at which point it would be * 8192). Tile state is 48b/tile (rounded to * a page), and tile alloc is 32b/tile * (rounded to a page), plus a page of extra, * for a total of 320kb for our worst-case. * We choose 512kb so that it divides evenly * into our 16MB, and the rest of the 512kb * will be used as storage for the overflow * from the initial 32b CL per bin. */ vc4->bin_alloc_size = 512 * 1024; vc4->bin_alloc_used = 0; vc4->bin_alloc_overflow = 0; WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != bo->base.base.size / vc4->bin_alloc_size); break; } /* Put it on the list to free later, and try again. */ list_add(&bo->unref_head, &list); } /* Free all the BOs we allocated but didn't choose. */ while (!list_empty(&list)) { struct vc4_bo *bo = list_last_entry(&list, struct vc4_bo, unref_head); list_del(&bo->unref_head); drm_gem_object_put_unlocked(&bo->base.base); } return ret; }
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, struct vc4_rcl_setup *setup) { struct drm_vc4_submit_cl *args = exec->args; bool has_bin = args->bin_cl_size != 0; uint8_t min_x_tile = args->min_x_tile; uint8_t min_y_tile = args->min_y_tile; uint8_t max_x_tile = args->max_x_tile; uint8_t max_y_tile = args->max_y_tile; uint8_t xtiles = max_x_tile - min_x_tile + 1; uint8_t ytiles = max_y_tile - min_y_tile + 1; uint8_t x, y; uint32_t size, loop_body_size; size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { size += VC4_PACKET_CLEAR_COLORS_SIZE + VC4_PACKET_TILE_COORDINATES_SIZE + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; } if (setup->color_read) { if (args->color_read.flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; } else { loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; } } if (setup->zs_read) { if (args->zs_read.flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; } else { if (setup->color_read && !(args->color_read.flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) { loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; } loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; } } if (has_bin) { size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; } if (setup->msaa_color_write) loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; if (setup->msaa_zs_write) loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; if (setup->zs_write) loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; if (setup->color_write) loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */ loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE * ((setup->msaa_color_write != NULL) + (setup->msaa_zs_write != NULL) + (setup->color_write != NULL) + (setup->zs_write != NULL) - 1); size += xtiles * ytiles * loop_body_size; setup->rcl = &vc4_bo_create(dev, size, true)->base; if (IS_ERR(setup->rcl)) return PTR_ERR(setup->rcl); list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, &exec->unref_list); /* The tile buffer gets cleared when the previous tile is stored. If * the clear values changed between frames, then the tile buffer has * stale clear values in it, so we have to do a store in None mode (no * writes) so that we trigger the tile buffer clear. */ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); rcl_u32(setup, args->clear_color[0]); rcl_u32(setup, args->clear_color[1]); rcl_u32(setup, args->clear_z); rcl_u8(setup, args->clear_s); vc4_tile_coordinates(setup, 0, 0); rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); rcl_u32(setup, 0); /* no address, since we're in None mode */ } rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); rcl_u32(setup, (setup->color_write ? (setup->color_write->paddr + args->color_write.offset) : 0)); rcl_u16(setup, args->width); rcl_u16(setup, args->height); rcl_u16(setup, args->color_write.bits); for (y = min_y_tile; y <= max_y_tile; y++) { for (x = min_x_tile; x <= max_x_tile; x++) { bool first = (x == min_x_tile && y == min_y_tile); bool last = (x == max_x_tile && y == max_y_tile); emit_tile(exec, setup, x, y, first, last); } } BUG_ON(setup->next_offset != size); exec->ct1ca = setup->rcl->paddr; exec->ct1ea = setup->rcl->paddr + setup->next_offset; return 0; }