static struct dma_fence * etnaviv_sched_dependency(struct drm_sched_job *sched_job, struct drm_sched_entity *entity) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); struct dma_fence *fence; int i; if (unlikely(submit->in_fence)) { fence = submit->in_fence; submit->in_fence = NULL; if (!dma_fence_is_signaled(fence)) return fence; dma_fence_put(fence); } for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_submit_bo *bo = &submit->bos[i]; int j; if (bo->excl) { fence = bo->excl; bo->excl = NULL; if (!dma_fence_is_signaled(fence)) return fence; dma_fence_put(fence); } for (j = 0; j < bo->nr_shared; j++) { if (!bo->shared[j]) continue; fence = bo->shared[j]; bo->shared[j] = NULL; if (!dma_fence_is_signaled(fence)) return fence; dma_fence_put(fence); } kfree(bo->shared); bo->nr_shared = 0; bo->shared = NULL; } return NULL; }
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence, uint64_t* handler) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = cring->sequence; unsigned idx = 0; struct dma_fence *other = NULL; idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; if (other) BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; cring->sequence++; spin_unlock(&ctx->ring_lock); dma_fence_put(other); if (handler) *handler = seq; return 0; }
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; uint32_t val; struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); mutex_lock(&adev->virt.lock_kiq); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); mutex_unlock(&adev->virt.lock_kiq); r = dma_fence_wait(f, false); if (r) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); val = adev->wb.wb[adev->virt.reg_val_offs]; return val; }
static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; struct amdgpu_job *job; int r; if (!sched_job) { DRM_ERROR("job is null\n"); return NULL; } job = to_amdgpu_job(sched_job); BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); return fence; }
/* * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL): * * Signal and consume a fence ealier attached to a vGEM handle using * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH). * * All fences must be signaled within 10s of attachment or otherwise they * will automatically expire (and a vgem_fence_signal_ioctl returns -ETIMEDOUT). * * Signaling a fence indicates to all consumers of the dma-buf that the * client has completed the operation associated with the fence, and that the * buffer is then ready for consumption. * * If the fence does not exist (or has already been signaled by the client), * vgem_fence_signal_ioctl returns -ENOENT. */ int vgem_fence_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct vgem_file *vfile = file->driver_priv; struct drm_vgem_fence_signal *arg = data; struct dma_fence *fence; int ret = 0; if (arg->flags) return -EINVAL; mutex_lock(&vfile->fence_mutex); fence = idr_replace(&vfile->fence_idr, NULL, arg->fence); mutex_unlock(&vfile->fence_mutex); if (!fence) return -ENOENT; if (IS_ERR(fence)) return PTR_ERR(fence); if (dma_fence_is_signaled(fence)) ret = -ETIMEDOUT; dma_fence_signal(fence); dma_fence_put(fence); return ret; }
/** * amdgpu_fence_driver_fini - tear down the fence driver * for all possible rings. * * @adev: amdgpu device pointer * * Tear down the fence driver for all possible rings (all asics). */ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) { unsigned i, j; int r; for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->fence_drv.initialized) continue; r = amdgpu_fence_wait_empty(ring); if (r) { /* no need to trigger GPU reset as we are unloading */ amdgpu_fence_driver_force_completion(adev); } amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); amd_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); kfree(ring->fence_drv.fences); ring->fence_drv.fences = NULL; ring->fence_drv.initialized = false; } }
/** * si_dma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. */ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; long r; r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err0; } ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); ib.ptr[1] = lower_32_bits(gpu_addr); ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err1; r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { DRM_INFO("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; }
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, struct dma_fence *fence, uint64_t* handle) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; struct dma_fence *other = NULL; unsigned idx = 0; idx = seq & (amdgpu_sched_jobs - 1); other = centity->fences[idx]; if (other) BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); spin_lock(&ctx->ring_lock); centity->fences[idx] = fence; centity->sequence++; spin_unlock(&ctx->ring_lock); dma_fence_put(other); if (handle) *handle = seq; }
int tegra_uapi_syncpt_wait(struct drm_device *drm, void *data, struct drm_file *file) { struct drm_tegra_syncpt_wait *args = data; struct tegra_drm_file *fpriv = file->driver_priv; struct tegra_drm *tegra = drm->dev_private; struct tegra_drm_context_v1 *context; struct dma_fence *fence = NULL; int ret; spin_lock(&tegra->context_lock); context = idr_find(&fpriv->uapi_v1_contexts, args->thresh); if (context) fence = drm_syncobj_fence_get(context->syncobj); spin_unlock(&tegra->context_lock); if (!context) return -EINVAL; if (fence) { ret = dma_fence_wait_timeout(fence, false, msecs_to_jiffies(args->timeout)); dma_fence_put(fence); if (!ret) return -ETIMEDOUT; if (ret < 0) return ret; } return 0; }
/** * drm_writeback_signal_completion - Signal the completion of a writeback job * @wb_connector: The writeback connector whose job is complete * @status: Status code to set in the writeback out_fence (0 for success) * * Drivers should call this to signal the completion of a previously queued * writeback job. It should be called as soon as possible after the hardware * has finished writing, and may be called from interrupt context. * It is the driver's responsibility to ensure that for a given connector, the * hardware completes writeback jobs in the same order as they are queued. * * Unless the driver is holding its own reference to the framebuffer, it must * not be accessed after calling this function. * * See also: drm_writeback_queue_job() */ void drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector, int status) { unsigned long flags; struct drm_writeback_job *job; spin_lock_irqsave(&wb_connector->job_lock, flags); job = list_first_entry_or_null(&wb_connector->job_queue, struct drm_writeback_job, list_entry); if (job) { list_del(&job->list_entry); if (job->out_fence) { if (status) dma_fence_set_error(job->out_fence, status); dma_fence_signal(job->out_fence); dma_fence_put(job->out_fence); } } spin_unlock_irqrestore(&wb_connector->job_lock, flags); if (WARN_ON(!job)) return; INIT_WORK(&job->cleanup_work, cleanup_work); queue_work(system_long_wq, &job->cleanup_work); }
static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); kfree(job); }
void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); kfree(job); }
static void sync_file_free(struct kref *kref) { struct sync_file *sync_file = container_of(kref, struct sync_file, kref); if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); }
static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); return 0; }
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_job *job; struct amdgpu_ib *ib; struct amdgpu_ring *ring; struct dma_fence *f = NULL; int ret; switch (engine) { case KGD_ENGINE_MEC1: ring = &adev->gfx.compute_ring[0]; break; case KGD_ENGINE_SDMA1: ring = &adev->sdma.instance[0].ring; break; case KGD_ENGINE_SDMA2: ring = &adev->sdma.instance[1].ring; break; default: pr_err("Invalid engine in IB submission: %d\n", engine); ret = -EINVAL; goto err; } ret = amdgpu_job_alloc(adev, 1, &job, NULL); if (ret) goto err; ib = &job->ibs[0]; memset(ib, 0, sizeof(struct amdgpu_ib)); ib->gpu_addr = gpu_addr; ib->ptr = ib_cmd; ib->length_dw = ib_len; /* This works for NO_HWS. TODO: need to handle without knowing VMID */ job->vmid = vmid; ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); if (ret) { DRM_ERROR("amdgpu: failed to schedule IB.\n"); goto err_ib_sched; } ret = dma_fence_wait(f, false); err_ib_sched: dma_fence_put(f); amdgpu_job_free(job); err: return ret; }
/** * __drm_atomic_helper_plane_destroy_state - release plane state * @state: plane state object to release * * Releases all resources stored in the plane state without actually freeing * the memory of the plane state. This is useful for drivers that subclass the * plane state. */ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state) { if (state->fb) drm_framebuffer_put(state->fb); if (state->fence) dma_fence_put(state->fence); if (state->commit) drm_crtc_commit_put(state->commit); drm_property_blob_put(state->fb_damage_clips); }
/** * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t dummy; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); if (r) return r; ib = &job->ibs[0]; dummy = ib->gpu_addr + 1024; ib->length_dw = 0; ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; ib->ptr[ib->length_dw++] = 0x00010000; ib->ptr[ib->length_dw++] = upper_32_bits(dummy); ib->ptr[ib->length_dw++] = dummy; ib->ptr[ib->length_dw++] = 0x00000014; ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ ib->ptr[ib->length_dw++] = 0x0000001c; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = 0x00000000; ib->ptr[ib->length_dw++] = 0x00000008; ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; if (fence) *fence = dma_fence_get(f); dma_fence_put(f); return 0; err: amdgpu_job_free(job); return r; }
static void submit_cleanup(struct kref *kref) { struct etnaviv_gem_submit *submit = container_of(kref, struct etnaviv_gem_submit, refcount); unsigned i; if (submit->runtime_resumed) pm_runtime_put_autosuspend(submit->gpu->dev); if (submit->cmdbuf.suballoc) etnaviv_cmdbuf_free(&submit->cmdbuf); for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; /* unpin all objects */ if (submit->bos[i].flags & BO_PINNED) { etnaviv_gem_mapping_unreference(submit->bos[i].mapping); atomic_dec(&etnaviv_obj->gpu_active); submit->bos[i].mapping = NULL; submit->bos[i].flags &= ~BO_PINNED; } /* if the GPU submit failed, objects might still be locked */ submit_unlock_object(submit, i); drm_gem_object_put_unlocked(&etnaviv_obj->base); } wake_up_all(&submit->gpu->fence_event); if (submit->in_fence) dma_fence_put(submit->in_fence); if (submit->out_fence) dma_fence_put(submit->out_fence); kfree(submit->pmrs); kfree(submit); }
static int __i915_sw_fence_call i915_clflush_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct clflush *clflush = container_of(fence, typeof(*clflush), wait); switch (state) { case FENCE_COMPLETE: schedule_work(&clflush->work); break; case FENCE_FREE: dma_fence_put(&clflush->dma); break; } return NOTIFY_DONE; }
/** * amdgpu_fence_process - check for fence activity * * @ring: pointer to struct amdgpu_ring * * Checks the current fence value and calculates the last * signalled fence value. Wakes the fence queue if the * sequence number has increased. */ void amdgpu_fence_process(struct amdgpu_ring *ring) { struct amdgpu_fence_driver *drv = &ring->fence_drv; uint32_t seq, last_seq; int r; do { last_seq = atomic_read(&ring->fence_drv.last_seq); seq = amdgpu_fence_read(ring); } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); if (seq != ring->fence_drv.sync_seq) amdgpu_fence_schedule_fallback(ring); if (unlikely(seq == last_seq)) return; last_seq &= drv->num_fences_mask; seq &= drv->num_fences_mask; do { struct dma_fence *fence, **ptr; ++last_seq; last_seq &= drv->num_fences_mask; ptr = &drv->fences[last_seq]; /* There is always exactly one thread signaling this fence slot */ fence = rcu_dereference_protected(*ptr, 1); RCU_INIT_POINTER(*ptr, NULL); if (!fence) continue; r = dma_fence_signal(fence); if (!r) DMA_FENCE_TRACE(fence, "signaled from irq context\n"); else BUG(); dma_fence_put(fence); } while (last_seq != seq); }
static int nouveau_fence_signal(struct nouveau_fence *fence) { int drop = 0; dma_fence_signal_locked(&fence->base); list_del(&fence->head); rcu_assign_pointer(fence->channel, NULL); if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); if (!--fctx->notify_ref) drop = 1; } dma_fence_put(&fence->base); return drop; }
static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; for (i = 0; i < num_entities; ++i) for (j = 0; j < amdgpu_sched_jobs; ++j) dma_fence_put(ctx->entities[0][i].fences[j]); kfree(ctx->fences); kfree(ctx->entities[0]); mutex_destroy(&ctx->lock); kfree(ctx); }
static void i915_clflush_work(struct work_struct *work) { struct clflush *clflush = container_of(work, typeof(*clflush), work); struct drm_i915_gem_object *obj = clflush->obj; if (i915_gem_object_pin_pages(obj)) { DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); goto out; } __i915_do_clflush(obj); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); dma_fence_signal(&clflush->dma); dma_fence_put(&clflush->dma); }
static void hl_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; int i; /* * If we arrived here, there are no jobs waiting for this context * on its queues so we can safely remove it. * This is because for each CS, we increment the ref count and for * every CS that was finished we decrement it and we won't arrive * to this function unless the ref count is 0 */ for (i = 0 ; i < HL_MAX_PENDING_CS ; i++) dma_fence_put(ctx->cs_pending[i]); if (ctx->asid != HL_KERNEL_ASID_ID) { hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); } }
static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (j = 0; j < amdgpu_sched_jobs; ++j) dma_fence_put(ctx->rings[i].fences[j]); kfree(ctx->fences); ctx->fences = NULL; amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); mutex_destroy(&ctx->lock); kfree(ctx); }
/** * amdgpu_fence_wait_empty - wait for all fences to signal * * @adev: amdgpu device pointer * @ring: ring index the fence is associated with * * Wait for all fences on the requested ring to signal (all asics). * Returns 0 if the fences have passed, error for all other cases. */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq); struct dma_fence *fence, **ptr; int r; if (!seq) return 0; ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; rcu_read_lock(); fence = rcu_dereference(*ptr); if (!fence || !dma_fence_get_rcu(fence)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); r = dma_fence_wait(fence, false); dma_fence_put(fence); return r; }
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; struct dma_fence *f; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); mutex_lock(&adev->virt.lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); mutex_unlock(&adev->virt.lock); r = dma_fence_wait(f, false); if (r) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); }
/** * uvd_v6_0_enc_ring_test_ib - test if UVD ENC IBs are working * * @ring: the engine to test on * */ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence = NULL; long r; r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); if (r) goto error; r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); if (r) goto error; r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) r = -ETIMEDOUT; else if (r > 0) r = 0; error: dma_fence_put(fence); return r; }
static int __vgem_fence_idr_fini(int id, void *p, void *data) { dma_fence_signal(p); dma_fence_put(p); return 0; }
static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); struct v3d_exec_info *exec = job->exec; enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; struct v3d_dev *v3d = exec->v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; if (unlikely(job->base.s_fence->finished.error)) return NULL; /* Lock required around bin_job update vs * v3d_overflow_mem_work(). */ spin_lock_irqsave(&v3d->job_lock, irqflags); if (q == V3D_BIN) { v3d->bin_job = job->exec; /* Clear out the overflow allocation, so we don't * reuse the overflow attached to a previous job. */ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); } else { v3d->render_job = job->exec; } spin_unlock_irqrestore(&v3d->job_lock, irqflags); /* Can we avoid this flush when q==RENDER? We need to be * careful of scheduling, though -- imagine job0 rendering to * texture and job1 reading, and them being executed as bin0, * bin1, render0, render1, so that render1's flush at bin time * wasn't enough. */ v3d_invalidate_caches(v3d); fence = v3d_fence_create(v3d, q); if (IS_ERR(fence)) return NULL; if (job->done_fence) dma_fence_put(job->done_fence); job->done_fence = dma_fence_get(fence); trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, job->start, job->end); if (q == V3D_BIN) { if (exec->qma) { V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); } if (exec->qts) { V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, V3D_CLE_CT0QTS_ENABLE | exec->qts); } } else { /* XXX: Set the QCFG */ } /* Set the current and end address of the control list. * Writing the end register is what starts the job. */ V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); return fence; }