static void vc4_bo_cache_time_timer(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; struct vc4_dev *vc4 = to_vc4_dev(dev); schedule_work(&vc4->bo_cache.time_work); }
static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev, uint32_t size) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t page_index = bo_page_index(size); struct vc4_bo *bo = NULL; size = roundup(size, PAGE_SIZE); mutex_lock(&vc4->bo_lock); if (page_index >= vc4->bo_cache.size_list_size) goto out; if (list_empty(&vc4->bo_cache.size_list[page_index])) goto out; bo = list_first_entry(&vc4->bo_cache.size_list[page_index], struct vc4_bo, size_head); vc4_bo_remove_from_cache(bo); kref_init(&bo->base.base.refcount); out: mutex_unlock(&vc4->bo_lock); return bo; }
int vc4_bo_stats_debugfs(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo_stats stats; /* Take a snapshot of the current stats with the lock held. */ mutex_lock(&vc4->bo_lock); stats = vc4->bo_stats; mutex_unlock(&vc4->bo_lock); seq_printf(m, "num bos allocated: %d\n", stats.num_allocated); seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024); seq_printf(m, "num bos used: %d\n", stats.num_allocated - stats.num_cached); seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated - stats.size_cached) / 1024); seq_printf(m, "num bos cached: %d\n", stats.num_cached); seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024); return 0; }
static bool vc4_fence_signaled(struct dma_fence *fence) { struct vc4_fence *f = to_vc4_fence(fence); struct vc4_dev *vc4 = to_vc4_dev(f->dev); return vc4->finished_seqno >= f->seqno; }
int vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); return 0; }
static void vc4_v3d_init_hw(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); /* Take all the memory that would have been reserved for user * QPU programs, since we don't have an interface for running * them, anyway. */ V3D_WRITE(V3D_VPMBASE, 0); }
/* Must be called with bo_lock held. */ static void vc4_bo_remove_from_cache(struct vc4_bo *bo) { struct drm_gem_object *obj = &bo->base.base; struct vc4_dev *vc4 = to_vc4_dev(obj->dev); vc4->bo_stats.num_cached--; vc4->bo_stats.size_cached -= obj->size; list_del(&bo->unref_head); list_del(&bo->size_head); }
/* Called on the last userspace/kernel unreference of the BO. Returns * it to the BO cache if possible, otherwise frees it. */ void vc4_free_object(struct drm_gem_object *gem_bo) { struct drm_device *dev = gem_bo->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo = to_vc4_bo(gem_bo); struct list_head *cache_list; mutex_lock(&vc4->bo_lock); /* If the object references someone else's memory, we can't cache it. */ if (gem_bo->import_attach) { vc4_bo_destroy(bo); goto out; } /* Don't cache if it was publicly named. */ if (gem_bo->name) { vc4_bo_destroy(bo); goto out; } /* If this object was partially constructed but CMA allocation * had failed, just free it. */ if (!bo->base.vaddr) { vc4_bo_destroy(bo); goto out; } cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size); if (!cache_list) { vc4_bo_destroy(bo); goto out; } if (bo->validated_shader) { kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; } bo->free_time = jiffies; list_add(&bo->size_head, cache_list); list_add(&bo->unref_head, &vc4->bo_cache.time_list); vc4->bo_stats.num_cached++; vc4->bo_stats.size_cached += gem_bo->size; vc4_bo_cache_free_old(dev); out: mutex_unlock(&vc4->bo_lock); }
static void vc4_cancel_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec = vc4_first_bin_job(vc4); if (!exec) return; list_move_tail(&exec->head, &vc4->bin_job_list); vc4_submit_next_bin_job(dev); }
static void vc4_irq_finish_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec = vc4_first_bin_job(vc4); if (!exec) return; vc4_move_job_to_render(dev, exec); vc4_submit_next_bin_job(dev); }
void vc4_irq_preinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); init_waitqueue_head(&vc4->job_wait_queue); INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work); /* Clear any pending interrupts someone might have left around * for us. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); }
void vc4_bo_cache_purge(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); mutex_lock(&vc4->bo_lock); while (!list_empty(&vc4->bo_cache.time_list)) { struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list, struct vc4_bo, unref_head); vc4_bo_remove_from_cache(bo); vc4_bo_destroy(bo); } mutex_unlock(&vc4->bo_lock); }
void vc4_bo_cache_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); mutex_init(&vc4->bo_lock); INIT_LIST_HEAD(&vc4->bo_cache.time_list); INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work); setup_timer(&vc4->bo_cache.time_timer, vc4_bo_cache_time_timer, (unsigned long)dev); }
void vc4_irq_uninstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); /* Disable sending interrupts for our driver's IRQs. */ V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS); /* Clear any pending interrupts we might have left. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); cancel_work_sync(&vc4->overflow_mem_work); }
static int vc4_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_get_param *args = data; int ret; if (args->pad != 0) return -EINVAL; switch (args->param) { case DRM_VC4_PARAM_V3D_IDENT0: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT0); pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT1: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT1); pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT2: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT2); pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_SUPPORTS_BRANCHES: case DRM_VC4_PARAM_SUPPORTS_ETC1: case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: case DRM_VC4_PARAM_SUPPORTS_MADVISE: case DRM_VC4_PARAM_SUPPORTS_PERFMON: args->value = true; break; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; } return 0; }
void vc4_bo_cache_destroy(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); del_timer(&vc4->bo_cache.time_timer); cancel_work_sync(&vc4->bo_cache.time_work); vc4_bo_cache_purge(dev); if (vc4->bo_stats.num_allocated) { DRM_ERROR("Destroying BO cache while BOs still allocated:\n"); vc4_bo_stats_dump(vc4); } }
int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); int i; for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) { seq_printf(m, "%s (0x%04x): 0x%08x\n", vc4_reg_defs[i].name, vc4_reg_defs[i].reg, V3D_READ(vc4_reg_defs[i].reg)); } return 0; }
/* Must be called with bo_lock held. */ static void vc4_bo_destroy(struct vc4_bo *bo) { struct drm_gem_object *obj = &bo->base.base; struct vc4_dev *vc4 = to_vc4_dev(obj->dev); if (bo->validated_shader) { kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; } vc4->bo_stats.num_allocated--; vc4->bo_stats.size_allocated -= obj->size; drm_gem_cma_free_object(obj); }
/** * vc4_gem_create_object - Implementation of driver->gem_create_object. * * This lets the CMA helpers allocate object structs for us, and keep * our BO stats correct. */ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_bo *bo; bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) return ERR_PTR(-ENOMEM); mutex_lock(&vc4->bo_lock); vc4->bo_stats.num_allocated++; vc4->bo_stats.size_allocated += size; mutex_unlock(&vc4->bo_lock); return &bo->base.base; }
static void vc4_irq_finish_render_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec = vc4_first_render_job(vc4); if (!exec) return; vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); schedule_work(&vc4->job_done_work); }
static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = NULL; int ret; v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); if (!v3d) return -ENOMEM; dev_set_drvdata(dev, v3d); v3d->pdev = pdev; v3d->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(v3d->regs)) return PTR_ERR(v3d->regs); vc4->v3d = v3d; v3d->vc4 = vc4; if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); return -EINVAL; } /* Reset the binner overflow address/size at setup, to be sure * we don't reuse an old one. */ V3D_WRITE(V3D_BPOA, 0); V3D_WRITE(V3D_BPOS, 0); vc4_v3d_init_hw(drm); ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); if (ret) { DRM_ERROR("Failed to install IRQ handler\n"); return ret; } pm_runtime_enable(dev); return 0; }
static void vc4_v3d_unbind(struct device *dev, struct device *master, void *data) { struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm); pm_runtime_disable(dev); drm_irq_uninstall(drm); /* Disable the binner's overflow memory address, so the next * driver probe (if any) doesn't try to reuse our old * allocation. */ V3D_WRITE(V3D_BPOA, 0); V3D_WRITE(V3D_BPOS, 0); vc4->v3d = NULL; }
/* Must be called with bo_lock held. */ static void vc4_bo_cache_free_old(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long expire_time = jiffies - msecs_to_jiffies(1000); while (!list_empty(&vc4->bo_cache.time_list)) { struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list, struct vc4_bo, unref_head); if (time_before(expire_time, bo->free_time)) { mod_timer(&vc4->bo_cache.time_timer, round_jiffies_up(jiffies + msecs_to_jiffies(1000))); return; } vc4_bo_remove_from_cache(bo); vc4_bo_destroy(bo); } }
int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t ident1 = V3D_READ(V3D_IDENT1); uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC); uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS); uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS); seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV)); seq_printf(m, "Slices: %d\n", nslc); seq_printf(m, "TMUs: %d\n", nslc * tups); seq_printf(m, "QPUs: %d\n", nslc * qups); seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM)); return 0; }
irqreturn_t vc4_irq(int irq, void *arg) { struct drm_device *dev = arg; struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t intctl; irqreturn_t status = IRQ_NONE; barrier(); intctl = V3D_READ(V3D_INTCTL); /* Acknowledge the interrupts we're handling here. The binner * last flush / render frame done interrupt will be cleared, * while OUTOMEM will stay high until the underlying cause is * cleared. */ V3D_WRITE(V3D_INTCTL, intctl); if (intctl & V3D_INT_OUTOMEM) { /* Disable OUTOMEM until the work is done. */ V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM); schedule_work(&vc4->overflow_mem_work); status = IRQ_HANDLED; } if (intctl & V3D_INT_FLDONE) { spin_lock(&vc4->job_lock); vc4_irq_finish_bin_job(dev); spin_unlock(&vc4->job_lock); status = IRQ_HANDLED; } if (intctl & V3D_INT_FRDONE) { spin_lock(&vc4->job_lock); vc4_irq_finish_render_job(dev); spin_unlock(&vc4->job_lock); status = IRQ_HANDLED; } return status; }
/** Reinitializes interrupt registers when a GPU reset is performed. */ void vc4_irq_reset(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); unsigned long irqflags; /* Acknowledge any stale IRQs. */ V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); /* * Turn all our interrupts on. Binner out of memory is the * only one we expect to trigger at this point, since we've * just come from poweron and haven't supplied any overflow * memory yet. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); spin_lock_irqsave(&vc4->job_lock, irqflags); vc4_cancel_bin_job(dev); vc4_irq_finish_render_job(dev); spin_unlock_irqrestore(&vc4->job_lock, irqflags); }
void vc4_hvs_dump_state(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); int i; for (i = 0; i < ARRAY_SIZE(hvs_regs); i++) { DRM_INFO("0x%04x (%s): 0x%08x\n", hvs_regs[i].reg, hvs_regs[i].name, HVS_READ(hvs_regs[i].reg)); } DRM_INFO("HVS ctx:\n"); for (i = 0; i < 64; i += 4) { DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D", readl((u32 __iomem *)vc4->hvs->dlist + i + 0), readl((u32 __iomem *)vc4->hvs->dlist + i + 1), readl((u32 __iomem *)vc4->hvs->dlist + i + 2), readl((u32 __iomem *)vc4->hvs->dlist + i + 3)); } }
static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, size_t size) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t page_index = bo_page_index(size); if (vc4->bo_cache.size_list_size <= page_index) { uint32_t new_size = max(vc4->bo_cache.size_list_size * 2, page_index + 1); struct list_head *new_list; uint32_t i; new_list = kmalloc_array(new_size, sizeof(struct list_head), GFP_KERNEL); if (!new_list) return NULL; /* Rebase the old cached BO lists to their new list * head locations. */ for (i = 0; i < vc4->bo_cache.size_list_size; i++) { struct list_head *old_list = &vc4->bo_cache.size_list[i]; if (list_empty(old_list)) INIT_LIST_HEAD(&new_list[i]); else list_replace(old_list, &new_list[i]); } /* And initialize the brand new BO list heads. */ for (i = vc4->bo_cache.size_list_size; i < new_size; i++) INIT_LIST_HEAD(&new_list[i]); kfree(vc4->bo_cache.size_list); vc4->bo_cache.size_list = new_list; vc4->bo_cache.size_list_size = new_size; } return &vc4->bo_cache.size_list[page_index]; }
struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, bool allow_unzeroed) { size_t size = roundup(unaligned_size, PAGE_SIZE); struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_gem_cma_object *cma_obj; struct vc4_bo *bo; if (size == 0) return ERR_PTR(-EINVAL); /* First, try to get a vc4_bo from the kernel BO cache. */ bo = vc4_bo_get_from_cache(dev, size); if (bo) { if (!allow_unzeroed) memset(bo->base.vaddr, 0, bo->base.base.size); return bo; } cma_obj = drm_gem_cma_create(dev, size); if (IS_ERR(cma_obj)) { /* * If we've run out of CMA memory, kill the cache of * CMA allocations we've got laying around and try again. */ vc4_bo_cache_purge(dev); cma_obj = drm_gem_cma_create(dev, size); if (IS_ERR(cma_obj)) { DRM_ERROR("Failed to allocate from CMA:\n"); vc4_bo_stats_dump(vc4); return ERR_PTR(-ENOMEM); } } return to_vc4_bo(&cma_obj->base); }
static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = NULL; int ret; v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL); if (!v3d) return -ENOMEM; dev_set_drvdata(dev, v3d); v3d->pdev = pdev; v3d->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(v3d->regs)) return PTR_ERR(v3d->regs); vc4->v3d = v3d; v3d->vc4 = vc4; v3d->clk = devm_clk_get(dev, NULL); if (IS_ERR(v3d->clk)) { int ret = PTR_ERR(v3d->clk); if (ret == -ENOENT) { /* bcm2835 didn't have a clock reference in the DT. */ ret = 0; v3d->clk = NULL; } else { if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to get V3D clock: %d\n", ret); return ret; } } if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); return -EINVAL; } ret = clk_prepare_enable(v3d->clk); if (ret != 0) return ret; ret = vc4_allocate_bin_bo(drm); if (ret) { clk_disable_unprepare(v3d->clk); return ret; } /* Reset the binner overflow address/size at setup, to be sure * we don't reuse an old one. */ V3D_WRITE(V3D_BPOA, 0); V3D_WRITE(V3D_BPOS, 0); vc4_v3d_init_hw(drm); ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); if (ret) { DRM_ERROR("Failed to install IRQ handler\n"); return ret; } pm_runtime_set_active(dev); pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ pm_runtime_enable(dev); return 0; }