static struct pipe_fence_handle * radeon_drm_cs_get_next_fence(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct pipe_fence_handle *fence = NULL; if (cs->next_fence) { radeon_fence_reference(&fence, cs->next_fence); return fence; } fence = radeon_cs_create_fence(rcs); if (!fence) return NULL; radeon_fence_reference(&cs->next_fence, fence); return fence; }
static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, unsigned flags, struct pipe_fence_handle **pfence) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= GFX6) { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (cs->ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->current.cdw & 15) radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->current.cdw > rcs->current.max_dw) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (pfence || cs->csc->num_slab_buffers) { struct pipe_fence_handle *fence; if (cs->next_fence) { fence = cs->next_fence; cs->next_fence = NULL; } else { fence = radeon_cs_create_fence(rcs); } if (fence) { if (pfence) radeon_fence_reference(pfence, fence); mtx_lock(&cs->ws->bo_fence_lock); for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) { struct radeon_bo *bo = cs->csc->slab_buffers[i].bo; p_atomic_inc(&bo->num_active_ioctls); radeon_bo_slab_fence(bo, (struct radeon_bo *)fence); } mtx_unlock(&cs->ws->bo_fence_lock); radeon_fence_reference(&fence, NULL); } } else { radeon_fence_reference(&cs->next_fence, NULL); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { unsigned i, num_relocs; num_relocs = cs->cst->num_relocs; cs->cst->chunks[0].length_dw = cs->base.current.cdw; for (i = 0; i < num_relocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); } switch (cs->ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: case RING_COMPUTE: cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & PIPE_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (cs->ring_type == RING_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (util_queue_is_initialized(&cs->ws->cs_queue)) { util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, radeon_drm_cs_emit_ioctl_oneshot, NULL); if (!(flags & PIPE_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, 0); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.current.buf = cs->csc->buf; cs->base.current.cdw = 0; cs->base.used_vram = 0; cs->base.used_gart = 0; if (cs->ring_type == RING_GFX) cs->ws->num_gfx_IBs++; else if (cs->ring_type == RING_DMA) cs->ws->num_sdma_IBs++; return 0; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. * hawaii with old firmware needs type2 nop packet. * accel_working2 with value 3 indicates the new firmware. */ if (cs->ws->info.chip_class <= SI || (cs->ws->info.family == CHIP_HAWAII && cs->ws->accel_working2 < 3)) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (fence) { radeon_fence_reference(fence, NULL); *fence = radeon_cs_create_fence(rcs); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs; crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread) { pipe_semaphore_wait(&cs->flush_completed); radeon_drm_ws_queue_cs(cs->ws, cs); if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; cs->ws->num_cs_flushes++; }