static int opencl_abi_si_ndrange_finish_impl(X86Context *ctx)
{
	driver_state.wait_for_ndrange_completion = 1;

	if (!list_count(si_emu->running_work_groups) && 
		!list_count(si_emu->waiting_work_groups))
	{
		opencl_debug("\tndrange is complete\n");
		/* Reset driver state */
		si_ndrange_free(driver_state.ndrange);
		driver_state.ndrange = NULL;
		driver_state.kernel = NULL;
		driver_state.wait_for_ndrange_completion = 0;
		driver_state.ndrange_complete = 0;
		driver_state.ready_for_work = 0;

		si_emu->ndrange = NULL;
	}
	else 
	{
		opencl_debug("\tsuspending driver thread\n");
		/* Suspend x86 context until simulation completes */
		X86ContextSuspend(ctx, opencl_abi_si_ndrange_finish_can_wakeup, 
			NULL, opencl_abi_si_ndrange_finish_wakeup, NULL);
	}

	return 0;
}
Beispiel #2
0
/* Finalize GPU kernel */
void si_emu_done()
{
	/* GPU report */
	if (si_emu_report_file)
		fclose(si_emu_report_file);

	/* Free ND-Ranges */
	while (si_emu->ndrange_list_count)
		si_ndrange_free(si_emu->ndrange_list_head);

	/* Free OpenCL objects */
	si_opencl_repo_free_all_objects(si_emu->opencl_repo);
	si_opencl_repo_free(si_emu->opencl_repo);

	/* Finalize disassembler */
	si_disasm_done();

	/* Finalize ISA */
	si_isa_done();

	/* Finalize GPU kernel */
	mem_free(si_emu->global_mem);
	m2s_timer_free(si_emu->timer);
	free(si_emu);
}
static void opengl_abi_si_ndrange_finish_wakeup(X86Context *ctx, 
	void *user_data)
{
	assert(!user_data);

	/* Reset driver state */
	si_ndrange_free(driver_state.ndrange);
	driver_state.ndrange = NULL;
	driver_state.shader = NULL;
	driver_state.wait_for_ndrange_completion = 0;
	driver_state.ndrange_complete = 0;
	driver_state.ready_for_work = 0;

	si_emu->ndrange = NULL;
}
static void opencl_abi_si_ndrange_finish_wakeup(X86Context *ctx, 
	void *user_data)
{
	assert(!user_data);

	opencl_debug("waking up after finish");

	/* Reset driver state */
	si_ndrange_free(driver_state.ndrange);
	driver_state.ndrange = NULL;
	driver_state.kernel = NULL;
	driver_state.wait_for_ndrange_completion = 0;
	driver_state.ndrange_complete = 0;
	driver_state.ready_for_work = 0;

	si_emu->ndrange = NULL;
}
Beispiel #5
0
/* Run one iteration of the Southern Islands GPU emulation loop.
 * Return FALSE if there is no more emulation to perform. */
int si_emu_run(void)
{
	struct si_ndrange_t *ndrange;
	struct si_ndrange_t *ndrange_next;

	struct si_work_group_t *work_group;
	struct si_work_group_t *work_group_next;

	struct si_wavefront_t *wavefront;
	struct si_wavefront_t *wavefront_next;

	/* For efficiency when no Southern Islands emulation is selected, exit here
	 * if the list of existing ND-Ranges is empty. */
	if (!si_emu->ndrange_list_count)
		return 0;

	/* Start any ND-Range in state 'pending' */
	while ((ndrange = si_emu->pending_ndrange_list_head))
	{
		/* Set all ready work-groups to running */
		while ((work_group = ndrange->pending_list_head))
		{
			si_work_group_clear_status(work_group, si_work_group_pending);
			si_work_group_set_status(work_group, si_work_group_running);
		}

		/* Set is in state 'running' */
		si_ndrange_clear_status(ndrange, si_ndrange_pending);
		si_ndrange_set_status(ndrange, si_ndrange_running);
	}

	/* Run one instruction of each wavefront in each work-group of each
	 * ND-Range that is in status 'running'. */
	for (ndrange = si_emu->running_ndrange_list_head; ndrange; ndrange = ndrange_next)
	{
		/* Save next ND-Range in state 'running'. This is done because the state
		 * might change during the execution of the ND-Range. */
		ndrange_next = ndrange->running_ndrange_list_next;

		/* Execute an instruction from each work-group */
		for (work_group = ndrange->running_list_head; work_group; work_group = work_group_next)
		{
			/* Save next running work-group */
			work_group_next = work_group->running_list_next;

			/* Run an instruction from each wavefront */
			for (wavefront = work_group->running_list_head; wavefront; wavefront = wavefront_next)
			{
				/* Save next running wavefront */
				wavefront_next = wavefront->running_list_next;

				/* Execute instruction in wavefront */
				si_wavefront_execute(wavefront);
			}
		}
	}

	/* Free ND-Ranges that finished */
	while ((ndrange = si_emu->finished_ndrange_list_head))
	{
		/* Dump ND-Range report */
		si_ndrange_dump(ndrange, si_emu_report_file);

		/* Stop if maximum number of kernels reached */
		if (si_emu_max_kernels && si_emu->ndrange_count >= si_emu_max_kernels)
			esim_finish = esim_finish_si_max_kernels;

		/* Extract from list of finished ND-Ranges and free */
		si_ndrange_free(ndrange);
	}

	/* Return TRUE */
	return 1;
}
Beispiel #6
0
/* Run one iteration of the Southern Islands GPU timing simulation loop. */
int si_gpu_run(void)
{
    struct si_ndrange_t *ndrange;

    struct si_compute_unit_t *compute_unit;
    struct si_compute_unit_t *compute_unit_next;

    /* For efficiency when no Southern Islands emulation is selected, exit here
     * if the list of existing ND-Ranges is empty. */
    if (!si_emu->ndrange_list_count)
        return 0;

    /* Start one ND-Range in state 'pending' */
    while ((ndrange = si_emu->pending_ndrange_list_head))
    {
        /* Currently not supported for more than 1 ND-Range */
        if (si_gpu->ndrange)
            fatal("%s: Southern Islands GPU timing simulation not supported "
                  "for multiple ND-Ranges", __FUNCTION__);

        /* Set ND-Range status to 'running' */
        si_ndrange_clear_status(ndrange, si_ndrange_pending);
        si_ndrange_set_status(ndrange, si_ndrange_running);

        /* Trace */
        si_trace("si.new_ndrange id=%d wg_first=%d wg_count=%d\n", ndrange->id,
                 ndrange->work_group_id_first, ndrange->work_group_count);

        /* Map ND-Range to GPU */
        si_gpu_map_ndrange(ndrange);
        si_calc_plot();
    }

    /* Mapped ND-Range */
    ndrange = si_gpu->ndrange;
    assert(ndrange);

    /* Allocate work-groups to compute units */
    while (si_gpu->compute_unit_ready_list_head && ndrange->pending_list_head)
        si_compute_unit_map_work_group(si_gpu->compute_unit_ready_list_head,
                                       ndrange->pending_list_head);

    /* One more cycle */
    si_gpu->cycle++;

    /* Stop if maximum number of GPU cycles exceeded */
    if (si_emu_max_cycles && si_gpu->cycle >= si_emu_max_cycles)
        esim_finish = esim_finish_si_max_cycles;

    /* Stop if maximum number of GPU instructions exceeded */
    if (si_emu_max_inst && si_emu->inst_count >= si_emu_max_inst)
        esim_finish = esim_finish_si_max_inst;

    /* Stop if any reason met */
    if (esim_finish)
        return 1;

    /* Free instructions in trash */
    si_gpu_uop_trash_empty();

    /* Run one loop iteration on each busy compute unit */
    for (compute_unit = si_gpu->compute_unit_busy_list_head; compute_unit;
            compute_unit = compute_unit_next)
    {
        /* Store next busy compute unit, since this can change
         * during the compute unit simulation loop iteration. */
        compute_unit_next = compute_unit->compute_unit_busy_list_next;

        /* Run one cycle */
        si_compute_unit_run(compute_unit);
    }

    /* If ND-Range finished execution in all compute units, free it. */
    if (!si_gpu->compute_unit_busy_list_count)
    {
        /* Dump ND-Range report */
        si_ndrange_dump(ndrange, si_emu_report_file);

        /* Stop if maximum number of kernels reached */
        if (si_emu_max_kernels && si_emu->ndrange_count >= si_emu_max_kernels)
            esim_finish = esim_finish_si_max_kernels;

        /* Finalize and free ND-Range */
        assert(si_ndrange_get_status(ndrange, si_ndrange_finished));
        si_gpu_uop_trash_empty();
        si_gpu_unmap_ndrange();
        si_ndrange_free(ndrange);
    }

    /* Return true */
    return 1;
}