static int opencl_abi_si_ndrange_finish_impl(X86Context *ctx) { driver_state.wait_for_ndrange_completion = 1; if (!list_count(si_emu->running_work_groups) && !list_count(si_emu->waiting_work_groups)) { opencl_debug("\tndrange is complete\n"); /* Reset driver state */ si_ndrange_free(driver_state.ndrange); driver_state.ndrange = NULL; driver_state.kernel = NULL; driver_state.wait_for_ndrange_completion = 0; driver_state.ndrange_complete = 0; driver_state.ready_for_work = 0; si_emu->ndrange = NULL; } else { opencl_debug("\tsuspending driver thread\n"); /* Suspend x86 context until simulation completes */ X86ContextSuspend(ctx, opencl_abi_si_ndrange_finish_can_wakeup, NULL, opencl_abi_si_ndrange_finish_wakeup, NULL); } return 0; }
/* Finalize GPU kernel */ void si_emu_done() { /* GPU report */ if (si_emu_report_file) fclose(si_emu_report_file); /* Free ND-Ranges */ while (si_emu->ndrange_list_count) si_ndrange_free(si_emu->ndrange_list_head); /* Free OpenCL objects */ si_opencl_repo_free_all_objects(si_emu->opencl_repo); si_opencl_repo_free(si_emu->opencl_repo); /* Finalize disassembler */ si_disasm_done(); /* Finalize ISA */ si_isa_done(); /* Finalize GPU kernel */ mem_free(si_emu->global_mem); m2s_timer_free(si_emu->timer); free(si_emu); }
static void opengl_abi_si_ndrange_finish_wakeup(X86Context *ctx, void *user_data) { assert(!user_data); /* Reset driver state */ si_ndrange_free(driver_state.ndrange); driver_state.ndrange = NULL; driver_state.shader = NULL; driver_state.wait_for_ndrange_completion = 0; driver_state.ndrange_complete = 0; driver_state.ready_for_work = 0; si_emu->ndrange = NULL; }
static void opencl_abi_si_ndrange_finish_wakeup(X86Context *ctx, void *user_data) { assert(!user_data); opencl_debug("waking up after finish"); /* Reset driver state */ si_ndrange_free(driver_state.ndrange); driver_state.ndrange = NULL; driver_state.kernel = NULL; driver_state.wait_for_ndrange_completion = 0; driver_state.ndrange_complete = 0; driver_state.ready_for_work = 0; si_emu->ndrange = NULL; }
/* Run one iteration of the Southern Islands GPU emulation loop. * Return FALSE if there is no more emulation to perform. */ int si_emu_run(void) { struct si_ndrange_t *ndrange; struct si_ndrange_t *ndrange_next; struct si_work_group_t *work_group; struct si_work_group_t *work_group_next; struct si_wavefront_t *wavefront; struct si_wavefront_t *wavefront_next; /* For efficiency when no Southern Islands emulation is selected, exit here * if the list of existing ND-Ranges is empty. */ if (!si_emu->ndrange_list_count) return 0; /* Start any ND-Range in state 'pending' */ while ((ndrange = si_emu->pending_ndrange_list_head)) { /* Set all ready work-groups to running */ while ((work_group = ndrange->pending_list_head)) { si_work_group_clear_status(work_group, si_work_group_pending); si_work_group_set_status(work_group, si_work_group_running); } /* Set is in state 'running' */ si_ndrange_clear_status(ndrange, si_ndrange_pending); si_ndrange_set_status(ndrange, si_ndrange_running); } /* Run one instruction of each wavefront in each work-group of each * ND-Range that is in status 'running'. */ for (ndrange = si_emu->running_ndrange_list_head; ndrange; ndrange = ndrange_next) { /* Save next ND-Range in state 'running'. This is done because the state * might change during the execution of the ND-Range. */ ndrange_next = ndrange->running_ndrange_list_next; /* Execute an instruction from each work-group */ for (work_group = ndrange->running_list_head; work_group; work_group = work_group_next) { /* Save next running work-group */ work_group_next = work_group->running_list_next; /* Run an instruction from each wavefront */ for (wavefront = work_group->running_list_head; wavefront; wavefront = wavefront_next) { /* Save next running wavefront */ wavefront_next = wavefront->running_list_next; /* Execute instruction in wavefront */ si_wavefront_execute(wavefront); } } } /* Free ND-Ranges that finished */ while ((ndrange = si_emu->finished_ndrange_list_head)) { /* Dump ND-Range report */ si_ndrange_dump(ndrange, si_emu_report_file); /* Stop if maximum number of kernels reached */ if (si_emu_max_kernels && si_emu->ndrange_count >= si_emu_max_kernels) esim_finish = esim_finish_si_max_kernels; /* Extract from list of finished ND-Ranges and free */ si_ndrange_free(ndrange); } /* Return TRUE */ return 1; }
/* Run one iteration of the Southern Islands GPU timing simulation loop. */ int si_gpu_run(void) { struct si_ndrange_t *ndrange; struct si_compute_unit_t *compute_unit; struct si_compute_unit_t *compute_unit_next; /* For efficiency when no Southern Islands emulation is selected, exit here * if the list of existing ND-Ranges is empty. */ if (!si_emu->ndrange_list_count) return 0; /* Start one ND-Range in state 'pending' */ while ((ndrange = si_emu->pending_ndrange_list_head)) { /* Currently not supported for more than 1 ND-Range */ if (si_gpu->ndrange) fatal("%s: Southern Islands GPU timing simulation not supported " "for multiple ND-Ranges", __FUNCTION__); /* Set ND-Range status to 'running' */ si_ndrange_clear_status(ndrange, si_ndrange_pending); si_ndrange_set_status(ndrange, si_ndrange_running); /* Trace */ si_trace("si.new_ndrange id=%d wg_first=%d wg_count=%d\n", ndrange->id, ndrange->work_group_id_first, ndrange->work_group_count); /* Map ND-Range to GPU */ si_gpu_map_ndrange(ndrange); si_calc_plot(); } /* Mapped ND-Range */ ndrange = si_gpu->ndrange; assert(ndrange); /* Allocate work-groups to compute units */ while (si_gpu->compute_unit_ready_list_head && ndrange->pending_list_head) si_compute_unit_map_work_group(si_gpu->compute_unit_ready_list_head, ndrange->pending_list_head); /* One more cycle */ si_gpu->cycle++; /* Stop if maximum number of GPU cycles exceeded */ if (si_emu_max_cycles && si_gpu->cycle >= si_emu_max_cycles) esim_finish = esim_finish_si_max_cycles; /* Stop if maximum number of GPU instructions exceeded */ if (si_emu_max_inst && si_emu->inst_count >= si_emu_max_inst) esim_finish = esim_finish_si_max_inst; /* Stop if any reason met */ if (esim_finish) return 1; /* Free instructions in trash */ si_gpu_uop_trash_empty(); /* Run one loop iteration on each busy compute unit */ for (compute_unit = si_gpu->compute_unit_busy_list_head; compute_unit; compute_unit = compute_unit_next) { /* Store next busy compute unit, since this can change * during the compute unit simulation loop iteration. */ compute_unit_next = compute_unit->compute_unit_busy_list_next; /* Run one cycle */ si_compute_unit_run(compute_unit); } /* If ND-Range finished execution in all compute units, free it. */ if (!si_gpu->compute_unit_busy_list_count) { /* Dump ND-Range report */ si_ndrange_dump(ndrange, si_emu_report_file); /* Stop if maximum number of kernels reached */ if (si_emu_max_kernels && si_emu->ndrange_count >= si_emu_max_kernels) esim_finish = esim_finish_si_max_kernels; /* Finalize and free ND-Range */ assert(si_ndrange_get_status(ndrange, si_ndrange_finished)); si_gpu_uop_trash_empty(); si_gpu_unmap_ndrange(); si_ndrange_free(ndrange); } /* Return true */ return 1; }