static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(cs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; pipe_semaphore_signal(&cs->flush_queued); } else { radeon_drm_cs_emit_ioctl_oneshot(cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); radeon_drm_cs_sync_flush(rcs); pipe_semaphore_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); radeon_cs_context_cleanup(&cs->csc2); p_atomic_dec(&cs->ws->num_cs); radeon_destroy_cs_context(&cs->csc1); radeon_destroy_cs_context(&cs->csc2); FREE(cs); }
static void radeon_drm_cs_destroy(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); radeon_drm_cs_sync_flush(rcs); util_queue_fence_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); radeon_cs_context_cleanup(&cs->csc2); p_atomic_dec(&cs->ws->num_cs); radeon_destroy_cs_context(&cs->csc1); radeon_destroy_cs_context(&cs->csc2); radeon_fence_reference(&cs->next_fence, NULL); FREE(cs); }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; radeon_drm_cs_sync_flush(cs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty, emit it in a separate thread. */ if (cs->base.cdw) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->cs.num_chunks = 3; cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS; } else { cs->cst->cs.num_chunks = 2; } if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; pipe_semaphore_signal(&cs->flush_queued); } else { radeon_drm_cs_emit_ioctl_oneshot(cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); radeon_drm_cs_sync_flush(cs); if (cs->thread) { cs->kill_thread = 1; pipe_semaphore_signal(&cs->flush_queued); pipe_semaphore_wait(&cs->flush_completed); pipe_thread_wait(cs->thread); } pipe_semaphore_destroy(&cs->flush_queued); pipe_semaphore_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); radeon_cs_context_cleanup(&cs->csc2); p_atomic_dec(&cs->ws->num_cs); radeon_destroy_cs_context(&cs->csc1); radeon_destroy_cs_context(&cs->csc2); FREE(cs); }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (flags & RADEON_FLUSH_COMPUTE) { if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; radeon_drm_ws_queue_cs(cs->ws, cs); } else { pipe_mutex_lock(cs->ws->cs_stack_lock); if (cs->ws->thread) { while (p_atomic_read(&cs->ws->ncs)) { pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock); } } pipe_mutex_unlock(cs->ws->cs_stack_lock); radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; }
static void *radeon_bo_map_internal(struct pb_buffer *_buf, unsigned flags, void *flush_ctx) { struct radeon_bo *bo = radeon_bo(_buf); struct radeon_drm_cs *cs = flush_ctx; struct drm_radeon_gem_mmap args = {}; void *ptr; /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ if (!(flags & PB_USAGE_UNSYNCHRONIZED)) { /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ if (flags & PB_USAGE_DONTBLOCK) { if (!(flags & PB_USAGE_CPU_WRITE)) { /* Mapping for read. * * Since we are mapping for read, we don't need to wait * if the GPU is using the buffer for read too * (neither one is changing it). * * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); return NULL; } if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_WRITE)) { return NULL; } } else { if (radeon_bo_is_referenced_by_cs(cs, bo)) { cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); return NULL; } if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) { return NULL; } } } else { if (!(flags & PB_USAGE_CPU_WRITE)) { /* Mapping for read. * * Since we are mapping for read, we don't need to wait * if the GPU is using the buffer for read too * (neither one is changing it). * * Only check whether the buffer is being used for write. */ if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { cs->flush_cs(cs->flush_data, 0); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_WRITE); } else { /* Mapping for write. */ if (radeon_bo_is_referenced_by_cs(cs, bo)) { cs->flush_cs(cs->flush_data, 0); } else { /* Try to avoid busy-waiting in radeon_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) radeon_drm_cs_sync_flush(cs); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); } } } /* Return the pointer if it's already mapped. */ if (bo->ptr) return bo->ptr; /* Map the buffer. */ pipe_mutex_lock(bo->map_mutex); /* Return the pointer if it's already mapped (in case of a race). */ if (bo->ptr) { pipe_mutex_unlock(bo->map_mutex); return bo->ptr; } args.handle = bo->handle; args.offset = 0; args.size = (uint64_t)bo->size; if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_MMAP, &args, sizeof(args))) { pipe_mutex_unlock(bo->map_mutex); fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", bo, bo->handle); return NULL; } ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, bo->rws->fd, args.addr_ptr); if (ptr == MAP_FAILED) { pipe_mutex_unlock(bo->map_mutex); fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); return NULL; } bo->ptr = ptr; pipe_mutex_unlock(bo->map_mutex); return bo->ptr; }
static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, unsigned flags, struct pipe_fence_handle **pfence) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= GFX6) { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. */ if (cs->ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->current.cdw & 15) radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->current.cdw > rcs->current.max_dw) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (pfence || cs->csc->num_slab_buffers) { struct pipe_fence_handle *fence; if (cs->next_fence) { fence = cs->next_fence; cs->next_fence = NULL; } else { fence = radeon_cs_create_fence(rcs); } if (fence) { if (pfence) radeon_fence_reference(pfence, fence); mtx_lock(&cs->ws->bo_fence_lock); for (unsigned i = 0; i < cs->csc->num_slab_buffers; ++i) { struct radeon_bo *bo = cs->csc->slab_buffers[i].bo; p_atomic_inc(&bo->num_active_ioctls); radeon_bo_slab_fence(bo, (struct radeon_bo *)fence); } mtx_unlock(&cs->ws->bo_fence_lock); radeon_fence_reference(&fence, NULL); } } else { radeon_fence_reference(&cs->next_fence, NULL); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { unsigned i, num_relocs; num_relocs = cs->cst->num_relocs; cs->cst->chunks[0].length_dw = cs->base.current.cdw; for (i = 0; i < num_relocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); } switch (cs->ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: case RING_COMPUTE: cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_has_virtual_memory) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & PIPE_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (cs->ring_type == RING_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (util_queue_is_initialized(&cs->ws->cs_queue)) { util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, radeon_drm_cs_emit_ioctl_oneshot, NULL); if (!(flags & PIPE_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, 0); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.current.buf = cs->csc->buf; cs->base.current.cdw = 0; cs->base.used_vram = 0; cs->base.used_gart = 0; if (cs->ring_type == RING_GFX) cs->ws->num_gfx_IBs++; else if (cs->ring_type == RING_DMA) cs->ws->num_sdma_IBs++; return 0; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. * hawaii with old firmware needs type2 nop packet. * accel_working2 with value 3 indicates the new firmware. */ if (cs->ws->info.chip_class <= SI || (cs->ws->info.family == CHIP_HAWAII && cs->ws->accel_working2 < 3)) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (fence) { radeon_fence_reference(fence, NULL); *fence = radeon_cs_create_fence(rcs); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs; crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread) { pipe_semaphore_wait(&cs->flush_completed); radeon_drm_ws_queue_cs(cs->ws, cs); if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; cs->ws->num_cs_flushes++; }