Exemplo n.º 1
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        cs->cst->flags[0] = 0;
        cs->cst->flags[1] = RADEON_CS_RING_GFX;
        cs->cst->cs.num_chunks = 2;
        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->cs.num_chunks = 3;
        }
        if (cs->ws->info.r600_virtual_address) {
            cs->cst->flags[0] |= RADEON_CS_USE_VM;
            cs->cst->cs.num_chunks = 3;
        }
        if (flags & RADEON_FLUSH_COMPUTE) {
            cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
            cs->cst->cs.num_chunks = 3;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Exemplo n.º 2
0
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);

    radeon_drm_cs_sync_flush(rcs);
    pipe_semaphore_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    FREE(cs);
}
Exemplo n.º 3
0
static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);

    radeon_drm_cs_sync_flush(rcs);
    util_queue_fence_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    radeon_fence_reference(&cs->next_fence, NULL);
    FREE(cs);
}
Exemplo n.º 4
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    radeon_drm_cs_sync_flush(cs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty, emit it in a separate thread. */
    if (cs->base.cdw) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
            cs->cst->cs.num_chunks = 3;
            cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
        } else {
            cs->cst->cs.num_chunks = 2;
        }

        if (cs->thread &&
            (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            pipe_semaphore_signal(&cs->flush_queued);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Exemplo n.º 5
0
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    radeon_drm_cs_sync_flush(cs);
    if (cs->thread) {
        cs->kill_thread = 1;
        pipe_semaphore_signal(&cs->flush_queued);
        pipe_semaphore_wait(&cs->flush_completed);
        pipe_thread_wait(cs->thread);
    }
    pipe_semaphore_destroy(&cs->flush_queued);
    pipe_semaphore_destroy(&cs->flush_completed);
    radeon_cs_context_cleanup(&cs->csc1);
    radeon_cs_context_cleanup(&cs->csc2);
    p_atomic_dec(&cs->ws->num_cs);
    radeon_destroy_cs_context(&cs->csc1);
    radeon_destroy_cs_context(&cs->csc2);
    FREE(cs);
}
Exemplo n.º 6
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (flags & RADEON_FLUSH_COMPUTE) {
            if (cs->ws->info.chip_class <= SI) {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
            } else {
                while (rcs->cdw & 7)
                    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
            }
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        }
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
        fprintf(stderr, "radeon: command stream overflowed\n");
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Flip command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
            cs->flush_started = 1;
            radeon_drm_ws_queue_cs(cs->ws, cs);
        } else {
            pipe_mutex_lock(cs->ws->cs_stack_lock);
            if (cs->ws->thread) {
                while (p_atomic_read(&cs->ws->ncs)) {
                    pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock);
                }
            }
            pipe_mutex_unlock(cs->ws->cs_stack_lock);
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;
}
Exemplo n.º 7
0
static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                                    unsigned flags, void *flush_ctx)
{
    struct radeon_bo *bo = radeon_bo(_buf);
    struct radeon_drm_cs *cs = flush_ctx;
    struct drm_radeon_gem_mmap args = {};
    void *ptr;

    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
    if (!(flags & PB_USAGE_UNSYNCHRONIZED)) {
        /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
        if (flags & PB_USAGE_DONTBLOCK) {
            if (!(flags & PB_USAGE_CPU_WRITE)) {
                /* Mapping for read.
                 *
                 * Since we are mapping for read, we don't need to wait
                 * if the GPU is using the buffer for read too
                 * (neither one is changing it).
                 *
                 * Only check whether the buffer is being used for write. */
                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
                    return NULL;
                }

                if (radeon_bo_is_busy((struct pb_buffer*)bo,
                                      RADEON_USAGE_WRITE)) {
                    return NULL;
                }
            } else {
                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
                    return NULL;
                }

                if (radeon_bo_is_busy((struct pb_buffer*)bo,
                                      RADEON_USAGE_READWRITE)) {
                    return NULL;
                }
            }
        } else {
            if (!(flags & PB_USAGE_CPU_WRITE)) {
                /* Mapping for read.
                 *
                 * Since we are mapping for read, we don't need to wait
                 * if the GPU is using the buffer for read too
                 * (neither one is changing it).
                 *
                 * Only check whether the buffer is being used for write. */
                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                    cs->flush_cs(cs->flush_data, 0);
                }
                radeon_bo_wait((struct pb_buffer*)bo,
                               RADEON_USAGE_WRITE);
            } else {
                /* Mapping for write. */
                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
                    cs->flush_cs(cs->flush_data, 0);
                } else {
                    /* Try to avoid busy-waiting in radeon_bo_wait. */
                    if (p_atomic_read(&bo->num_active_ioctls))
                        radeon_drm_cs_sync_flush(cs);
                }

                radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
            }
        }
    }

    /* Return the pointer if it's already mapped. */
    if (bo->ptr)
        return bo->ptr;

    /* Map the buffer. */
    pipe_mutex_lock(bo->map_mutex);
    /* Return the pointer if it's already mapped (in case of a race). */
    if (bo->ptr) {
        pipe_mutex_unlock(bo->map_mutex);
        return bo->ptr;
    }
    args.handle = bo->handle;
    args.offset = 0;
    args.size = (uint64_t)bo->size;
    if (drmCommandWriteRead(bo->rws->fd,
                            DRM_RADEON_GEM_MMAP,
                            &args,
                            sizeof(args))) {
        pipe_mutex_unlock(bo->map_mutex);
        fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
                bo, bo->handle);
        return NULL;
    }

    ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
               bo->rws->fd, args.addr_ptr);
    if (ptr == MAP_FAILED) {
        pipe_mutex_unlock(bo->map_mutex);
        fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
        return NULL;
    }
    bo->ptr = ptr;
    pipe_mutex_unlock(bo->map_mutex);

    return bo->ptr;
}
Exemplo n.º 8
0
static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs,
                               unsigned flags,
                               struct pipe_fence_handle **pfence)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= GFX6) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         */
        if (cs->ws->info.gfx_ib_pad_with_type2) {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->current.cdw & 7)
                radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->current.cdw & 15)
            radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->current.cdw > rcs->current.max_dw) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (pfence || cs->csc->num_slab_buffers) {
        struct pipe_fence_handle *fence;

        if (cs->next_fence) {
            fence = cs->next_fence;
            cs->next_fence = NULL;
        } else {
            fence = radeon_cs_create_fence(rcs);
        }

        if (fence) {
            if (pfence)
                radeon_fence_reference(pfence, fence);

            mtx_lock(&cs->ws->bo_fence_lock);
            for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) {
                struct radeon_bo *bo = cs->csc->slab_buffers[i].bo;
                p_atomic_inc(&bo->num_active_ioctls);
                radeon_bo_slab_fence(bo, (struct radeon_bo *)fence);
            }
            mtx_unlock(&cs->ws->bo_fence_lock);

            radeon_fence_reference(&fence, NULL);
        }
    } else {
        radeon_fence_reference(&cs->next_fence, NULL);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
        unsigned i, num_relocs;

        num_relocs = cs->cst->num_relocs;

        cs->cst->chunks[0].length_dw = cs->base.current.cdw;

        for (i = 0; i < num_relocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
        }

        switch (cs->ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
        case RING_COMPUTE:
            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 3;

            if (cs->ws->info.r600_has_virtual_memory) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & PIPE_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ring_type == RING_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
            if (!(flags & PIPE_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.current.buf = cs->csc->buf;
    cs->base.current.cdw = 0;
    cs->base.used_vram = 0;
    cs->base.used_gart = 0;

    if (cs->ring_type == RING_GFX)
        cs->ws->num_gfx_IBs++;
    else if (cs->ring_type == RING_DMA)
        cs->ws->num_sdma_IBs++;
    return 0;
}
Exemplo n.º 9
0
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
                                unsigned flags,
                                struct pipe_fence_handle **fence,
                                uint32_t cs_trace_id)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    struct radeon_cs_context *tmp;

    switch (cs->base.ring_type) {
    case RING_DMA:
        /* pad DMA ring to 8 DWs */
        if (cs->ws->info.chip_class <= SI) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
        }
        break;
    case RING_GFX:
        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
         * hawaii with old firmware needs type2 nop packet.
         * accel_working2 with value 3 indicates the new firmware.
         */
        if (cs->ws->info.chip_class <= SI ||
            (cs->ws->info.family == CHIP_HAWAII &&
             cs->ws->accel_working2 < 3)) {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        } else {
            while (rcs->cdw & 7)
                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
        }
        break;
    case RING_UVD:
        while (rcs->cdw & 15)
            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
        break;
    default:
        break;
    }

    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
       fprintf(stderr, "radeon: command stream overflowed\n");
    }

    if (fence) {
        radeon_fence_reference(fence, NULL);
        *fence = radeon_cs_create_fence(rcs);
    }

    radeon_drm_cs_sync_flush(rcs);

    /* Swap command streams. */
    tmp = cs->csc;
    cs->csc = cs->cst;
    cs->cst = tmp;

    cs->cst->cs_trace_id = cs_trace_id;

    /* If the CS is not empty or overflowed, emit it in a separate thread. */
    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
        unsigned i, crelocs;

        crelocs = cs->cst->crelocs;

        cs->cst->chunks[0].length_dw = cs->base.cdw;

        for (i = 0; i < crelocs; i++) {
            /* Update the number of active asynchronous CS ioctls for the buffer. */
            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
        }

        switch (cs->base.ring_type) {
        case RING_DMA:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_DMA;
            cs->cst->cs.num_chunks = 3;
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
            }
            break;

        case RING_UVD:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_UVD;
            cs->cst->cs.num_chunks = 3;
            break;

        case RING_VCE:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_VCE;
            cs->cst->cs.num_chunks = 3;
            break;

        default:
        case RING_GFX:
            cs->cst->flags[0] = 0;
            cs->cst->flags[1] = RADEON_CS_RING_GFX;
            cs->cst->cs.num_chunks = 2;
            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
                cs->cst->cs.num_chunks = 3;
            }
            if (cs->ws->info.r600_virtual_address) {
                cs->cst->flags[0] |= RADEON_CS_USE_VM;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_END_OF_FRAME) {
                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                cs->cst->cs.num_chunks = 3;
            }
            if (flags & RADEON_FLUSH_COMPUTE) {
                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                cs->cst->cs.num_chunks = 3;
            }
            break;
        }

        if (cs->ws->thread) {
            pipe_semaphore_wait(&cs->flush_completed);
            radeon_drm_ws_queue_cs(cs->ws, cs);
            if (!(flags & RADEON_FLUSH_ASYNC))
                radeon_drm_cs_sync_flush(rcs);
        } else {
            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
        }
    } else {
        radeon_cs_context_cleanup(cs->cst);
    }

    /* Prepare a new CS. */
    cs->base.buf = cs->csc->buf;
    cs->base.cdw = 0;

    cs->ws->num_cs_flushes++;
}