コード例 #1
0
ファイル: gpu.c プロジェクト: ajdupree/cs316-m2s
void si_gpu_uop_trash_empty(void)
{
    struct si_uop_t *uop;

    while (si_gpu->trash_uop_list->count)
    {
        linked_list_head(si_gpu->trash_uop_list);
        uop = linked_list_get(si_gpu->trash_uop_list);
        linked_list_remove(si_gpu->trash_uop_list);

        si_trace("si.end_inst id=%lld cu=%d\n",
                 uop->id_in_compute_unit, uop->compute_unit->id);

        si_uop_free(uop);
    }
}
コード例 #2
0
void si_simd_complete(struct si_simd_t *simd)
{
	struct si_uop_t *uop;
	int list_entries;
	int list_index = 0;
	int i;

	list_entries = list_count(simd->exec_buffer);

	assert(list_entries <= si_gpu_simd_exec_buffer_size);

	for (i = 0; i < list_entries; i++)
	{
		uop = list_get(simd->exec_buffer, list_index);
		assert(uop);

		if (asTiming(si_gpu)->cycle < uop->execute_ready)
		{
			list_index++;
			continue;
		}

		/* Access complete, remove the uop from the queue */
		list_remove(simd->exec_buffer, uop);

		si_trace("si.end_inst id=%lld cu=%d\n", uop->id_in_compute_unit,
			uop->compute_unit->id);

		/* Free uop */
		si_uop_free(uop);

		/* Statistics */
		simd->inst_count++;
		si_gpu->last_complete_cycle = asTiming(si_gpu)->cycle;
	}
}
コード例 #3
0
void si_simd_execute(struct si_simd_t *simd)
{
	struct si_uop_t *uop;
	int list_entries;
	int list_index = 0;
	int instructions_processed = 0;
	int i;

	list_entries = list_count(simd->decode_buffer);

	/* Sanity check the decode buffer */
	assert(list_entries <= si_gpu_simd_decode_buffer_size);

	for (i = 0; i < list_entries; i++)
	{
		uop = list_get(simd->decode_buffer, list_index);
		assert(uop);

		instructions_processed++;

		/* Uop is not ready yet */
		if (asTiming(si_gpu)->cycle < uop->decode_ready)
		{
			list_index++;
			continue;
		}

		/* Stall if the width has been reached */
		if (instructions_processed > si_gpu_simd_width)
		{
			si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_compute_unit, 
				simd->compute_unit->id, uop->wavefront->id, 
				uop->id_in_wavefront);
			list_index++;
			continue;
		}

		/* Sanity check exec buffer */
		assert(list_count(simd->exec_buffer) <= 
			si_gpu_simd_exec_buffer_size);

		/* Stall if SIMD unit is full */
		if (list_count(simd->exec_buffer) == 
			si_gpu_simd_exec_buffer_size)
		{
			si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_compute_unit, 
				simd->compute_unit->id, uop->wavefront->id, 
				uop->id_in_wavefront);
			list_index++;
			continue;
		}

		/* Includes time for pipelined read-exec-write of 
		 * all subwavefronts */
		uop->execute_ready = asTiming(si_gpu)->cycle + 
			si_gpu_simd_exec_latency;

		/* Transfer the uop to the outstanding execution buffer */
		list_remove(simd->decode_buffer, uop);
		list_enqueue(simd->exec_buffer, uop);

		uop->wavefront_pool_entry->ready_next_cycle = 1;

		si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
			"stg=\"simd-e\"\n", uop->id_in_compute_unit, 
			simd->compute_unit->id, uop->wavefront->id, 
			uop->id_in_wavefront);
	}
}
コード例 #4
0
void si_simd_decode(struct si_simd_t *simd)
{
	struct si_uop_t *uop;
	int instructions_processed = 0;
	int list_entries;
	int list_index = 0;
	int i;

	list_entries = list_count(simd->issue_buffer);

	/* Sanity check the issue buffer */
	assert(list_entries <= si_gpu_simd_issue_buffer_size);

	for (i = 0; i < list_entries; i++)
	{
		uop = list_get(simd->issue_buffer, list_index);
		assert(uop);

		instructions_processed++;

		/* Uop not ready yet */
		if (asTiming(si_gpu)->cycle < uop->issue_ready)
		{
			list_index++;
			continue;
		}

		/* Stall if the issue width has been reached. */
		if (instructions_processed > si_gpu_simd_width)
		{
			si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_compute_unit, 
				simd->compute_unit->id, uop->wavefront->id, 
				uop->id_in_wavefront);
			list_index++;
			continue;
		}

		/* Sanity check the decode buffer */
		assert(list_count(simd->decode_buffer) <= 
				si_gpu_simd_decode_buffer_size);

		/* Stall if the decode buffer is full. */
		if (list_count(simd->decode_buffer) == 
			si_gpu_simd_decode_buffer_size)
		{
			si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_compute_unit, 
				simd->compute_unit->id, uop->wavefront->id, 
				uop->id_in_wavefront);
			list_index++;
			continue;
		}

		uop->decode_ready = asTiming(si_gpu)->cycle + si_gpu_simd_decode_latency;
		list_remove(simd->issue_buffer, uop);
		list_enqueue(simd->decode_buffer, uop);

		if (si_spatial_report_active)
			si_alu_report_new_inst(simd->compute_unit);

		si_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
			"stg=\"simd-d\"\n", uop->id_in_compute_unit, 
			simd->compute_unit->id, uop->wavefront->id, 
			uop->id_in_wavefront);
	}
}
コード例 #5
0
void si_scalar_unit_writeback(struct si_scalar_unit_t *scalar_unit)
{
	struct si_uop_t *uop = NULL;
	struct si_wavefront_t *wavefront;
	struct si_work_group_t *work_group;
	struct si_ndrange_t *ndrange;
	int i;
	int list_count;
	int wavefront_id;

	/* Process completed memory instructions */
	list_count = linked_list_count(scalar_unit->mem_out_buffer);
	linked_list_head(scalar_unit->mem_out_buffer);
	for (i = 0; i < list_count; i++)
	{
		uop = linked_list_get(scalar_unit->mem_out_buffer);
		assert(uop);

		if (!uop->global_mem_witness)
		{
			/* Access complete, remove the uop from the queue */
			linked_list_remove(scalar_unit->mem_out_buffer);

			si_trace("si.inst id=%lld cu=%d stg=\"su-w\"\n", uop->id_in_compute_unit, 
				scalar_unit->compute_unit->id);

			/* Make the wavefront active again */
			wavefront = uop->wavefront;
			wavefront->ready = 1;

			/* Free uop */
			if (si_tracing())
				si_gpu_uop_trash_add(uop);
			else
				si_uop_free(uop);
		}
		else
		{
			linked_list_next(scalar_unit->mem_out_buffer);
		}
	}

	/* Process completed ALU instructions */
	list_count = linked_list_count(scalar_unit->alu_out_buffer);
	linked_list_head(scalar_unit->alu_out_buffer);
	for (i = 0; i < list_count; i++)
	{
		uop = linked_list_get(scalar_unit->alu_out_buffer);
		assert(uop);

		if (uop->execute_ready <= si_gpu->cycle)
		{
			/* Access complete, remove the uop from the queue */
			linked_list_remove(scalar_unit->alu_out_buffer);

			si_trace("si.inst id=%lld cu=%d stg=\"su-w\"\n", uop->id_in_compute_unit, 
				scalar_unit->compute_unit->id);

			/* Make the wavefront active again */
			wavefront = uop->wavefront;
			work_group = wavefront->work_group;
			ndrange = work_group->ndrange;
			if (wavefront->finished)
			{
				work_group->compute_unit_finished_count++;
			}
			else if (wavefront->barrier)
			{
				if (wavefront->barrier_cleared) 
				{
					/* All wavefronts have hit barrier */

					wavefront->barrier_cleared = 0;

					SI_FOREACH_WAVEFRONT_IN_WORK_GROUP(work_group, 
						wavefront_id)
					{
						wavefront = ndrange->wavefronts[wavefront_id];
						wavefront->barrier = 0;
						wavefront->ready = 1;
					}
	
				}
				else 
				{
					/* Wavefront is waiting at barrier */
				}
			}
			else 
			{
コード例 #6
0
ファイル: gpu.c プロジェクト: ajdupree/cs316-m2s
/* Run one iteration of the Southern Islands GPU timing simulation loop. */
int si_gpu_run(void)
{
    struct si_ndrange_t *ndrange;

    struct si_compute_unit_t *compute_unit;
    struct si_compute_unit_t *compute_unit_next;

    /* For efficiency when no Southern Islands emulation is selected, exit here
     * if the list of existing ND-Ranges is empty. */
    if (!si_emu->ndrange_list_count)
        return 0;

    /* Start one ND-Range in state 'pending' */
    while ((ndrange = si_emu->pending_ndrange_list_head))
    {
        /* Currently not supported for more than 1 ND-Range */
        if (si_gpu->ndrange)
            fatal("%s: Southern Islands GPU timing simulation not supported "
                  "for multiple ND-Ranges", __FUNCTION__);

        /* Set ND-Range status to 'running' */
        si_ndrange_clear_status(ndrange, si_ndrange_pending);
        si_ndrange_set_status(ndrange, si_ndrange_running);

        /* Trace */
        si_trace("si.new_ndrange id=%d wg_first=%d wg_count=%d\n", ndrange->id,
                 ndrange->work_group_id_first, ndrange->work_group_count);

        /* Map ND-Range to GPU */
        si_gpu_map_ndrange(ndrange);
        si_calc_plot();
    }

    /* Mapped ND-Range */
    ndrange = si_gpu->ndrange;
    assert(ndrange);

    /* Allocate work-groups to compute units */
    while (si_gpu->compute_unit_ready_list_head && ndrange->pending_list_head)
        si_compute_unit_map_work_group(si_gpu->compute_unit_ready_list_head,
                                       ndrange->pending_list_head);

    /* One more cycle */
    si_gpu->cycle++;

    /* Stop if maximum number of GPU cycles exceeded */
    if (si_emu_max_cycles && si_gpu->cycle >= si_emu_max_cycles)
        esim_finish = esim_finish_si_max_cycles;

    /* Stop if maximum number of GPU instructions exceeded */
    if (si_emu_max_inst && si_emu->inst_count >= si_emu_max_inst)
        esim_finish = esim_finish_si_max_inst;

    /* Stop if any reason met */
    if (esim_finish)
        return 1;

    /* Free instructions in trash */
    si_gpu_uop_trash_empty();

    /* Run one loop iteration on each busy compute unit */
    for (compute_unit = si_gpu->compute_unit_busy_list_head; compute_unit;
            compute_unit = compute_unit_next)
    {
        /* Store next busy compute unit, since this can change
         * during the compute unit simulation loop iteration. */
        compute_unit_next = compute_unit->compute_unit_busy_list_next;

        /* Run one cycle */
        si_compute_unit_run(compute_unit);
    }

    /* If ND-Range finished execution in all compute units, free it. */
    if (!si_gpu->compute_unit_busy_list_count)
    {
        /* Dump ND-Range report */
        si_ndrange_dump(ndrange, si_emu_report_file);

        /* Stop if maximum number of kernels reached */
        if (si_emu_max_kernels && si_emu->ndrange_count >= si_emu_max_kernels)
            esim_finish = esim_finish_si_max_kernels;

        /* Finalize and free ND-Range */
        assert(si_ndrange_get_status(ndrange, si_ndrange_finished));
        si_gpu_uop_trash_empty();
        si_gpu_unmap_ndrange();
        si_ndrange_free(ndrange);
    }

    /* Return true */
    return 1;
}