static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param; struct radeon_drm_cs *cs; unsigned i; while (1) { pipe_semaphore_wait(&ws->cs_queued); if (ws->kill_thread) break; pipe_mutex_lock(ws->cs_stack_lock); cs = ws->cs_stack[0]; for (i = 1; i < ws->ncs; i++) ws->cs_stack[i - 1] = ws->cs_stack[i]; ws->cs_stack[--ws->ncs] = NULL; pipe_mutex_unlock(ws->cs_stack_lock); if (cs) { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); pipe_semaphore_signal(&cs->flush_completed); } } pipe_mutex_lock(ws->cs_stack_lock); for (i = 0; i < ws->ncs; i++) { pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed); ws->cs_stack[i] = NULL; } ws->ncs = 0; pipe_mutex_unlock(ws->cs_stack_lock); return 0; }
/** * This is the thread's main entrypoint. * It's a simple loop: * 1. wait for work * 2. do work * 3. signal that we're done */ static PIPE_THREAD_ROUTINE( thread_function, init_data ) { struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; struct lp_rasterizer *rast = task->rast; boolean debug = false; unsigned fpstate = util_fpstate_get(); /* Make sure that denorms are treated like zeros. This is * the behavior required by D3D10. OpenGL doesn't care. */ util_fpstate_set_denorms_to_zero(fpstate); while (1) { /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); if (rast->exit_flag) break; if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize * - map the framebuffer surfaces */ lp_rast_begin( rast, lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't * get a null rast->curr_scene pointer. */ pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_scene(task, rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); /* XXX: shouldn't be necessary: */ if (task->thread_index == 0) { lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); pipe_semaphore_signal(&task->work_done); } return 0; }
void util_queue_job_wait(struct util_queue_fence *fence) { /* wait and set the semaphore to "busy" */ pipe_semaphore_wait(&fence->done); /* set the semaphore to "idle" */ pipe_semaphore_signal(&fence->done); }
void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs) { /* Wait for any pending ioctl to complete. */ if (cs->thread && cs->flush_started) { pipe_semaphore_wait(&cs->flush_completed); cs->flush_started = 0; } }
void util_queue_add_job(struct util_queue *queue, void *job, struct util_queue_fence *fence) { /* Set the semaphore to "busy". */ pipe_semaphore_wait(&fence->done); /* if the queue is full, wait until there is space */ pipe_semaphore_wait(&queue->has_space); pipe_mutex_lock(queue->lock); assert(queue->num_jobs < ARRAY_SIZE(queue->jobs)); queue->jobs[queue->num_jobs].job = job; queue->jobs[queue->num_jobs].fence = fence; queue->num_jobs++; pipe_mutex_unlock(queue->lock); pipe_semaphore_signal(&queue->queued); }
/* * Make sure previous submission of this cs are completed */ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); /* Wait for any pending ioctl to complete. */ if (cs->ws->thread) { pipe_semaphore_wait(&cs->flush_completed); pipe_semaphore_signal(&cs->flush_completed); } }
/** * This is the thread's main entrypoint. * It's a simple loop: * 1. wait for work * 2. do work * 3. signal that we're done */ static PIPE_THREAD_ROUTINE( thread_func, init_data ) { struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; struct lp_rasterizer *rast = task->rast; boolean debug = false; while (1) { /* wait for work */ if (debug) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); if (rast->exit_flag) break; if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize * - map the framebuffer surfaces */ lp_rast_begin( rast, lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't * get a null rast->curr_scene pointer. */ pipe_barrier_wait( &rast->barrier ); /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); rasterize_scene(task, rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); /* XXX: shouldn't be necessary: */ if (task->thread_index == 0) { lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); pipe_semaphore_signal(&task->work_done); } return NULL; }
static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { struct radeon_drm_cs *cs = (struct radeon_drm_cs*)param; while (1) { pipe_semaphore_wait(&cs->flush_queued); if (cs->kill_thread) break; radeon_drm_cs_emit_ioctl_oneshot(cs->cst); pipe_semaphore_signal(&cs->flush_completed); } pipe_semaphore_signal(&cs->flush_completed); return NULL; }
void lp_rast_finish( struct lp_rasterizer *rast ) { if (rast->num_threads == 0) { /* nothing to do */ } else { int i; /* wait for work to complete */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } } }
static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param; struct radeon_drm_cs *cs; unsigned i, empty_stack; while (1) { pipe_semaphore_wait(&ws->cs_queued); if (ws->kill_thread) break; next: pipe_mutex_lock(ws->cs_stack_lock); cs = ws->cs_stack[0]; pipe_mutex_unlock(ws->cs_stack_lock); if (cs) { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); pipe_mutex_lock(ws->cs_stack_lock); for (i = 1; i < p_atomic_read(&ws->ncs); i++) { ws->cs_stack[i - 1] = ws->cs_stack[i]; } ws->cs_stack[p_atomic_read(&ws->ncs) - 1] = NULL; empty_stack = p_atomic_dec_zero(&ws->ncs); if (empty_stack) { pipe_condvar_signal(ws->cs_queue_empty); } pipe_mutex_unlock(ws->cs_stack_lock); pipe_semaphore_signal(&cs->flush_completed); if (!empty_stack) { goto next; } } } pipe_mutex_lock(ws->cs_stack_lock); for (i = 0; i < p_atomic_read(&ws->ncs); i++) { pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed); ws->cs_stack[i] = NULL; } p_atomic_set(&ws->ncs, 0); pipe_condvar_signal(ws->cs_queue_empty); pipe_mutex_unlock(ws->cs_stack_lock); return NULL; }
static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); radeon_drm_cs_sync_flush(cs); if (cs->thread) { cs->kill_thread = 1; pipe_semaphore_signal(&cs->flush_queued); pipe_semaphore_wait(&cs->flush_completed); pipe_thread_wait(cs->thread); } pipe_semaphore_destroy(&cs->flush_queued); pipe_semaphore_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); radeon_cs_context_cleanup(&cs->csc2); p_atomic_dec(&cs->ws->num_cs); radeon_destroy_cs_context(&cs->csc1); radeon_destroy_cs_context(&cs->csc2); FREE(cs); }
/* Shutdown: */ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i; /* Set exit_flag and signal each thread's work_ready semaphore. * Each thread will be woken up, notice that the exit_flag is set and * break out of its main loop. The thread will then exit. */ rast->exit_flag = TRUE; for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); } /* Wait for threads to terminate before cleaning up per-thread data. * We don't actually call pipe_thread_wait to avoid dead lock on Windows * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ for (i = 0; i < rast->num_threads; i++) { #ifdef _WIN32 pipe_semaphore_wait(&rast->tasks[i].work_done); #else thrd_join(rast->threads[i], NULL); #endif } /* Clean up per-thread data */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_destroy(&rast->tasks[i].work_ready); pipe_semaphore_destroy(&rast->tasks[i].work_done); } for (i = 0; i < MAX2(1, rast->num_threads); i++) { align_free(rast->tasks[i].thread_data.cache); } /* for synchronizing rasterization threads */ if (rast->num_threads > 0) { pipe_barrier_destroy( &rast->barrier ); } lp_scene_queue_destroy(rast->full_scenes); FREE(rast); }
static PIPE_THREAD_ROUTINE(util_queue_thread_func, param) { struct util_queue *queue = (struct util_queue*)param; unsigned i; while (1) { struct util_queue_job job; pipe_semaphore_wait(&queue->queued); if (queue->kill_thread) break; pipe_mutex_lock(queue->lock); job = queue->jobs[0]; for (i = 1; i < queue->num_jobs; i++) queue->jobs[i - 1] = queue->jobs[i]; queue->jobs[--queue->num_jobs].job = NULL; pipe_mutex_unlock(queue->lock); pipe_semaphore_signal(&queue->has_space); if (job.job) { queue->execute_job(job.job); pipe_semaphore_signal(&job.fence->done); } } /* signal remaining jobs before terminating */ pipe_mutex_lock(queue->lock); for (i = 0; i < queue->num_jobs; i++) { pipe_semaphore_signal(&queue->jobs[i].fence->done); queue->jobs[i].job = NULL; } queue->num_jobs = 0; pipe_mutex_unlock(queue->lock); return 0; }
static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, struct pipe_fence_handle **fence, uint32_t cs_trace_id) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_cs_context *tmp; switch (cs->base.ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ if (cs->ws->info.chip_class <= SI) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x00000000); /* NOP packet */ } break; case RING_GFX: /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements * r6xx, requires at least 4 dw alignment to avoid a hw bug. * hawaii with old firmware needs type2 nop packet. * accel_working2 with value 3 indicates the new firmware. */ if (cs->ws->info.chip_class <= SI || (cs->ws->info.family == CHIP_HAWAII && cs->ws->accel_working2 < 3)) { while (rcs->cdw & 7) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ } else { while (rcs->cdw & 7) OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ } break; case RING_UVD: while (rcs->cdw & 15) OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ break; default: break; } if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { fprintf(stderr, "radeon: command stream overflowed\n"); } if (fence) { radeon_fence_reference(fence, NULL); *fence = radeon_cs_create_fence(rcs); } radeon_drm_cs_sync_flush(rcs); /* Swap command streams. */ tmp = cs->csc; cs->csc = cs->cst; cs->cst = tmp; cs->cst->cs_trace_id = cs_trace_id; /* If the CS is not empty or overflowed, emit it in a separate thread. */ if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs; crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; for (i = 0; i < crelocs; i++) { /* Update the number of active asynchronous CS ioctls for the buffer. */ p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } switch (cs->base.ring_type) { case RING_DMA: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_UVD; cs->cst->cs.num_chunks = 3; break; case RING_VCE: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_VCE; cs->cst->cs.num_chunks = 3; break; default: case RING_GFX: cs->cst->flags[0] = 0; cs->cst->flags[1] = RADEON_CS_RING_GFX; cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; } if (cs->ws->info.r600_virtual_address) { cs->cst->flags[0] |= RADEON_CS_USE_VM; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_END_OF_FRAME) { cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; cs->cst->cs.num_chunks = 3; } if (flags & RADEON_FLUSH_COMPUTE) { cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; cs->cst->cs.num_chunks = 3; } break; } if (cs->ws->thread) { pipe_semaphore_wait(&cs->flush_completed); radeon_drm_ws_queue_cs(cs->ws, cs); if (!(flags & RADEON_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); } } else { radeon_cs_context_cleanup(cs->cst); } /* Prepare a new CS. */ cs->base.buf = cs->csc->buf; cs->base.cdw = 0; cs->ws->num_cs_flushes++; }