Пример #1
0
void frm_grid_run(struct frm_grid_t *grid)
{
	struct frm_threadblock_t *threadblock, *threadblock_next;
	struct frm_warp_t *warp, *warp_next;
	unsigned long long int cycle = 0;

	/* Set all ready threadblocks to running */
	while ((threadblock = grid->pending_list_head))
	{
		frm_threadblock_clear_status(threadblock, frm_threadblock_pending);
		frm_threadblock_set_status(threadblock, frm_threadblock_running);
	}
                /* Set is in state 'running' */
                frm_grid_clear_status(grid, frm_grid_pending);
                frm_grid_set_status(grid, frm_grid_running);


	/* Execution loop */
	while (grid->running_list_head)
	{
		/* Stop if maximum number of GPU cycles exceeded */
		if (frm_emu_max_cycles && cycle >= frm_emu_max_cycles)
			esim_finish = esim_finish_frm_max_cycles;

		/* Stop if maximum number of GPU instructions exceeded */
		if (frm_emu_max_inst && frm_emu->inst_count >= frm_emu_max_inst)
			esim_finish = esim_finish_frm_max_inst;

		/* Stop if any reason met */
		if (esim_finish)
			break;

		/* Next cycle */
		cycle++;

		/* Execute an instruction from each work-group */
		for (threadblock = grid->running_list_head; threadblock; threadblock = threadblock_next)
		{
			/* Save next running work-group */
			threadblock_next = threadblock->running_list_next;

			/* Run an instruction from each warp */
			for (warp = threadblock->running_list_head; warp; warp = warp_next)
			{
				/* Save next running warp */
				warp_next = warp->running_list_next;

				/* Execute instruction in warp */
				frm_warp_execute(warp);
			}
		}
	}

	/* Dump stats */
	frm_grid_dump(grid, stdout);

	/* Stop if maximum number of functions reached */
	//if (frm_emu_max_functions && frm_emu->grid_count >= frm_emu_max_functions)
	//	x86_emu_finish = x86_emu_finish_max_gpu_functions;
}
Пример #2
0
int FrmEmuRun(Emu *self)
{
    FrmEmu *emu = asFrmEmu(self);

    struct frm_grid_t *grid;
    struct frm_thread_block_t *thread_block;
    struct frm_warp_t *warp;

    /* Stop emulation if no grid needs running */
    if (!list_count(emu->grids))
        return FALSE;

    /* Remove grid and its thread blocks from pending list, and add them to
     * running list */
    while ((grid = list_head(emu->pending_grids)))
    {
        while ((thread_block = list_head(grid->pending_thread_blocks)))
        {
            list_remove(grid->pending_thread_blocks, thread_block);
            list_add(grid->running_thread_blocks, thread_block);
        }

        list_remove(emu->pending_grids, grid);
        list_add(emu->running_grids, grid);
    }

    /* Run one instruction */
    while ((grid = list_head(emu->running_grids)))
    {
        while ((thread_block = list_head(grid->running_thread_blocks)))
        {
            while ((warp = list_head(thread_block->running_warps)))
            {
                if (warp->finished || warp->at_barrier)
                    continue;

                frm_warp_execute(warp);
            }
        }
    }

    /* Free finished grids */
    assert(list_count(emu->pending_grids) == 0 &&
           list_count(emu->running_grids) == 0);
    while ((grid = list_head(emu->finished_grids)))
    {
        /* Dump grid report */
        frm_grid_dump(grid, frm_emu_report_file);

        /* Remove grid from finished list */
        list_remove(emu->finished_grids, grid);

        /* Free grid */
        frm_grid_free(grid);
    }

    /* Continue emulation */
    return TRUE;
}
Пример #3
0
void frm_sm_fetch(struct frm_sm_t *sm, int wiq_id)
{
	int j;
	int instructions_processed = 0;
	int thread_id;
	struct frm_warp_t *warp;
	struct frm_thread_t *thread;
	struct frm_uop_t *uop;
	struct frm_thread_uop_t *thread_uop;
	struct frm_warp_inst_queue_entry_t *warp_inst_queue_entry;
	char inst_str[1024];
	char inst_str_trimmed[1024];

	warp = sm->warp_inst_queues[wiq_id]->entries[0]->warp;

	/* No warp */
	if (!warp) 
		return;

	/* Sanity check warp */
	assert(warp->warp_inst_queue_entry);

	/* If instruction is ready the next cycle */
	if (warp->warp_inst_queue_entry->ready_next_cycle)
	{
		warp->warp_inst_queue_entry->ready = 1;
		warp->warp_inst_queue_entry->ready_next_cycle = 0;
		return;
	}

	/* Only fetch a fixed number of instructions per cycle */
	if (instructions_processed == frm_gpu_fe_fetch_width)
		return;

	/* WIQ entry not ready */
	if (!warp->warp_inst_queue_entry->ready)
		return;

	/* If the warp finishes, there still may be outstanding 
	 * memory operations, so if the entry is marked finished 
	 * the warp must also be finished, but not vice-versa */
	if (warp->warp_inst_queue_entry->warp_finished)
	{
		assert(warp->finished);
		return;
	}

	/* Warp is finished but other warps from thread block
	 * remain.  There may still be outstanding memory operations, 
	 * but no more instructions should be fetched. */
	if (warp->finished)
		return;

	/* Warp is ready but waiting on outstanding 
	 * memory instructions */
	if (warp->warp_inst_queue_entry->wait_for_mem)
	{
		if (!warp->warp_inst_queue_entry->lgkm_cnt &&
				!warp->warp_inst_queue_entry->vm_cnt)
		{
			warp->warp_inst_queue_entry->wait_for_mem =
				0;	
		}
		else
		{
			/* TODO Show a waiting state in visualization 
			 * tool */
			/* XXX uop is already freed */
			return;
		}
	}

	/* Warp is ready but waiting at barrier */
	if (warp->warp_inst_queue_entry->wait_for_barrier)
	{
		/* TODO Show a waiting state in visualization tool */
		/* XXX uop is already freed */
		return;
	}

	/* If fetch buffer full */
	if (list_count(sm->fetch_buffers[wiq_id]) == 
			frm_gpu_fe_fetch_buffer_size)
		return;

	/* Emulate instruction */
	frm_warp_execute(warp);

	warp_inst_queue_entry = warp->warp_inst_queue_entry;
	warp_inst_queue_entry->ready = 0;

	/* Create uop */
	uop = frm_uop_create();
	uop->warp = warp;
	uop->thread_block = warp->thread_block;
	uop->sm = sm;
	uop->id_in_sm = sm->uop_id_counter++;
	uop->id_in_warp = warp->uop_id_counter++;
	uop->warp_inst_queue_id = wiq_id;
	uop->vector_mem_read = warp->vector_mem_read;
	uop->vector_mem_write = warp->vector_mem_write;
	uop->lds_read = warp->lds_read;
	uop->lds_write = warp->lds_write;
	uop->warp_inst_queue_entry = warp->warp_inst_queue_entry;
	uop->warp_last_inst = warp->finished;
	uop->mem_wait_inst = warp->mem_wait;
	uop->barrier_wait_inst = warp->barrier;
	uop->inst = warp->inst;
	uop->cycle_created = asTiming(frm_gpu)->cycle;
	assert(warp->thread_block && uop->thread_block);

	/* Debug */
	//frm_inst_dump(inst_str, sizeof inst_str, 
	//	warp->grid->inst_buffer, warp->pc / 8);

	/* Trace */
	if (frm_tracing())
	{
		//frm_inst_dump(&warp->inst, warp->inst_size, 
		//	warp->pc, 
		//	warp->grid->inst_buffer + warp->pc,
		//	inst_str, sizeof inst_str);
		str_single_spaces(inst_str_trimmed, 
				sizeof inst_str_trimmed, 
				inst_str);
		frm_trace("si.new_inst id=%lld cu=%d ib=%d wg=%d "
				"wf=%d uop_id=%lld stg=\"f\" asm=\"%s\"\n", 
				uop->id_in_sm, sm->id, 
				uop->warp_inst_queue_id, uop->thread_block->id, 
				warp->id, uop->id_in_warp, 
				inst_str_trimmed);
	}

	/* Update last memory accesses */
	for (thread_id = uop->warp->threads[0]->id_in_warp; 
			thread_id < uop->warp->thread_count; 
			thread_id++)
	{
		thread = uop->warp->threads[thread_id];
		thread_uop = 
			&uop->thread_uop[thread->id_in_warp];

		/* Global memory */
		thread_uop->global_mem_access_addr = 
			thread->global_mem_access_addr;
		thread_uop->global_mem_access_size = 
			thread->global_mem_access_size;

		/* LDS */
		thread_uop->lds_access_count = 
			thread->lds_access_count;
		for (j = 0; j < thread->lds_access_count; j++)
		{
			thread_uop->lds_access_kind[j] = 
				thread->lds_access_type[j];
			thread_uop->lds_access_addr[j] = 
				thread->lds_access_addr[j];
			thread_uop->lds_access_size[j] = 
				thread->lds_access_size[j];
		}
	}

	/* Access instruction cache. Record the time when the 
	 * instruction will have been fetched, as per the latency 
	 * of the instruction memory. */
	uop->fetch_ready = asTiming(frm_gpu)->cycle + frm_gpu_fe_fetch_latency;

	/* Insert into fetch buffer */
	list_enqueue(sm->fetch_buffers[wiq_id], uop);

	instructions_processed++;
	sm->inst_count++;
}