Ejemplo n.º 1
0
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);

    radeon_drm_cs_sync_flush(rcs);
    pipe_semaphore_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    FREE(cs);
}
Ejemplo n.º 2
0
static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);

    radeon_drm_cs_sync_flush(rcs);
    util_queue_fence_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    radeon_fence_reference(&cs->next_fence, NULL);
    FREE(cs);
}
Ejemplo n.º 3
0
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
    unsigned i;

    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
                            &csc->cs, sizeof(struct drm_radeon_cs))) {
        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
            unsigned i;

            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
            for (i = 0; i < csc->chunks[0].length_dw; i++) {
                fprintf(stderr, "0x%08X\n", csc->buf[i]);
            }
        } else {
            fprintf(stderr, "radeon: The kernel rejected CS, "
                    "see dmesg for more information.\n");
        }
    }

    if (cs->trace_buf) {
        radeon_dump_cs_on_lockup(cs, csc);
    }

    for (i = 0; i < csc->crelocs; i++)
        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);

    radeon_cs_context_cleanup(csc);
}
Ejemplo n.º 4
0
static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    boolean status =
        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;

    if (status) {
        cs->csc->validated_crelocs = cs->csc->crelocs;
    } else {
        /* Remove lately-added relocations. The validation failed with them
         * and the CS is about to be flushed because of that. Keep only
         * the already-validated relocations. */
        unsigned i;

        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
        }
        cs->csc->crelocs = cs->csc->validated_crelocs;

        /* Flush if there are any relocs. Clean up otherwise. */
        if (cs->csc->crelocs) {
            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
        } else {
            radeon_cs_context_cleanup(cs->csc);

            assert(cs->base.cdw == 0);
            if (cs->base.cdw != 0) {
                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
            }
        }
    }
    return status;
}
Ejemplo n.º 5
0
void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
{
    struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
    unsigned i;
    int r;

    r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
                            &csc->cs, sizeof(struct drm_radeon_cs));
    if (r) {
	if (r == -ENOMEM)
	    fprintf(stderr, "radeon: Not enough memory for command submission.\n");
	else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
            unsigned i;

            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
            for (i = 0; i < csc->chunks[0].length_dw; i++) {
                fprintf(stderr, "0x%08X\n", csc->buf[i]);
            }
        } else {
            fprintf(stderr, "radeon: The kernel rejected CS, "
                    "see dmesg for more information (%i).\n", r);
        }
    }

    for (i = 0; i < csc->num_relocs; i++)
        p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
    for (i = 0; i < csc->num_slab_buffers; i++)
        p_atomic_dec(&csc->slab_buffers[i].bo->num_active_ioctls);

    radeon_cs_context_cleanup(csc);
}
Ejemplo n.º 6
0
static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
{
    radeon_cs_context_cleanup(csc);
    FREE(csc->slab_buffers);
    FREE(csc->relocs_bo);
    FREE(csc->relocs);
}
Ejemplo n.º 7
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        cs->cst->flags[0] = 0;
        cs->cst->flags[1] = RADEON_CS_RING_GFX;
        cs->cst->cs.num_chunks = 2;
        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->cs.num_chunks = 3;
        }
        if (cs->ws->info.r600_virtual_address) {
            cs->cst->flags[0] |= RADEON_CS_USE_VM;
            cs->cst->cs.num_chunks = 3;
        }
        if (flags & RADEON_FLUSH_COMPUTE) {
            cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
            cs->cst->cs.num_chunks = 3;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Ejemplo n.º 8
0
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    radeon_drm_cs_sync_flush(cs);
    if (cs->thread) {
        cs->kill_thread = 1;
        pipe_semaphore_signal(&cs->flush_queued);
        pipe_semaphore_wait(&cs->flush_completed);
        pipe_thread_wait(cs->thread);
    }
    pipe_semaphore_destroy(&cs->flush_queued);
    pipe_semaphore_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    FREE(cs);
}
Ejemplo n.º 9
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty, emit it in a separate thread. */
    if (cs->base.cdw) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->cs.num_chunks = 3;
            cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
        } else {
            cs->cst->cs.num_chunks = 2;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Ejemplo n.º 10
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (flags & RADEON_FLUSH_COMPUTE) {
            if (cs->ws->info.chip_class <= SI) {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
            } else {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
            }
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        }
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
        fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            radeon_drm_ws_queue_cs(cs->ws, cs);
        } else {
            pipe_mutex_lock(cs->ws->cs_stack_lock);
            if (cs->ws->thread) {
                while (p_atomic_read(&cs->ws->ncs)) {
                    pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock);
                }
            }
            pipe_mutex_unlock(cs->ws->cs_stack_lock);
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Ejemplo n.º 11
0
static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs,
                               unsigned flags,
                               struct pipe_fence_handle **pfence)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= GFX6) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (cs->ws->info.gfx_ib_pad_with_type2) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->current.cdw & 15)
            radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->current.cdw > rcs->current.max_dw) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (pfence || cs->csc->num_slab_buffers) {
        struct pipe_fence_handle *fence;

        if (cs->next_fence) {
            fence = cs->next_fence;
            cs->next_fence = NULL;
        } else {
            fence = radeon_cs_create_fence(rcs);
        }

        if (fence) {
            if (pfence)
                radeon_fence_reference(pfence, fence);

            mtx_lock(&cs->ws->bo_fence_lock);
            for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
                struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
                p_atomic_inc(&bo->num_active_ioctls);
                radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
            }
            mtx_unlock(&cs->ws->bo_fence_lock);

            radeon_fence_reference(&fence, NULL);
        }
    } else {
        radeon_fence_reference(&cs->next_fence, NULL);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
        unsigned i, num_relocs;

        num_relocs = cs->cst->num_relocs;

        cs->cst->chunks[0].length_dw = cs->base.current.cdw;

        for (i = 0; i < num_relocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
        }

        switch (cs->ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
        case RING_COMPUTE:
            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 3;

            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & PIPE_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ring_type == RING_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
            if (!(flags & PIPE_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.current.buf = cs->csc->buf;
    cs->base.current.cdw = 0;
    cs->base.used_vram = 0;
    cs->base.used_gart = 0;

    if (cs->ring_type == RING_GFX)
        cs->ws->num_gfx_IBs++;
    else if (cs->ring_type == RING_DMA)
        cs->ws->num_sdma_IBs++;
    return 0;
}
Ejemplo n.º 12
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
                                unsigned flags,
                                struct pipe_fence_handle **fence,
                                uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         * hawaii with old firmware needs type2 nop packet.
         * accel_working2 with value 3 indicates the new firmware.
         */
        if (cs->ws->info.chip_class <= SI ||
            (cs->ws->info.family == CHIP_HAWAII &&
             cs->ws->accel_working2 < 3)) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->cdw & 15)
            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (fence) {
        radeon_fence_reference(fence, NULL);
        *fence = radeon_cs_create_fence(rcs);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs;

        crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread) {
            pipe_semaphore_wait(&cs->flush_completed);
            radeon_drm_ws_queue_cs(cs->ws, cs);
            if (!(flags & RADEON_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;

    cs->ws->num_cs_flushes++;
}