Esempio n. 1
0
void mips_sys_call(struct mips_ctx_t *ctx)
{
	struct mips_regs_t *regs = ctx->regs;

	int code;
	int err;

	/* System call code */
	code = regs->regs_R[2] - __NR_Linux;
	if (code < 1 || code >= mips_sys_code_count)
		fatal("%s: invalid system call code (%d)", __FUNCTION__, code);

	/* Statistics */
	mips_sys_call_freq[code]++;

	/* Debug */
	mips_sys_debug("'%s' (code %d, inst %lld, pid %d)\n",
		mips_sys_call_name[code], code, asEmu(mips_emu)->instructions, ctx->pid);
	mips_isa_call_debug("system call '%s' (code %d, inst %lld, pid %d)\n",
		mips_sys_call_name[code], code, asEmu(mips_emu)->instructions, ctx->pid);

	/* Perform system call */
	err = mips_sys_call_func[code](ctx);

	/* Set return value in 'eax', except for 'sigreturn' system call. Also, if the
	 * context got suspended, the wake up routine will set the return value. */
	if (code != mips_sys_code_sigreturn && !mips_ctx_get_status(ctx, mips_ctx_suspended))
		regs->regs_R[2] = err;

	/* Debug */
	mips_sys_debug("  ret=(%d, 0x%x)", err, err);
	if (err < 0 && err >= -SIM_ERRNO_MAX)
		mips_sys_debug(", errno=%s)", str_map_value(&mips_sys_error_code_map, -err));
	mips_sys_debug("\n");
}
Esempio n. 2
0
void MIPSEmuCreate(MIPSEmu *self)
{
	/* Parent */
	EmuCreate(asEmu(self), "MIPS");

	/* Initialize */
	self->current_pid = 100;
	pthread_mutex_init(&self->process_events_mutex, NULL);

	/* Virtual functions */
	asObject(self)->Dump = MIPSEmuDump;
	asEmu(self)->DumpSummary = MIPSEmuDumpSummary;
	asEmu(self)->Run = MIPSEmuRun;
}
Esempio n. 3
0
/* Run fast-forward simulation */
void X86CpuFastForward(X86Cpu *self) {
  X86Emu *emu = self->emu;

  /* Fast-forward simulation. Run 'x86_cpu_fast_forward' iterations of the x86
   * emulation loop until any simulation end reason is detected. */
  while (asEmu(emu)->instructions < x86_cpu_fast_forward_count && !esim_finish)
    X86EmuRun(asEmu(emu));

  /* Record number of instructions in fast-forward execution. */
  self->num_fast_forward_inst = asEmu(emu)->instructions;

  /* Output warning if simulation finished during fast-forward execution. */
  if (esim_finish)
    warning("x86 fast-forwarding finished simulation.\n%s",
            x86_cpu_err_fast_forward);
}
Esempio n. 4
0
void KplWarpExecute(KplWarp *self)
{
	KplEmu *emu;
	KplGrid *grid;
	KplThreadBlock *thread_block;
	KplThread *thread;
	struct KplInstWrap *inst;

	KplInstBytes inst_bytes;
	KplInstOpcode inst_op;
	int thread_id;

	/* Get current arch, grid, and thread-block */
	thread_block = self->thread_block;
	grid = thread_block->grid;
	emu = grid->emu;

	/* Get instruction */
	inst_bytes.as_uint[0] = self->inst_buffer[self->pc / self->inst_size] >> 32;
	inst_bytes.as_uint[1] = self->inst_buffer[self->pc / self->inst_size];
	kpl_isa_debug("%s:%d: warp[%d] executes instruction [0x%x] 0x%016llx\n",
			__FILE__, __LINE__, self->id, self->pc, inst_bytes.as_dword);

	/* Decode instruction */
	inst = self->inst;
	KplInstWrapDecode(inst, self->pc, &inst_bytes);

	/* Execute instruction */
	inst_op = KplInstWrapGetOpcode(inst);
	if (!inst_op)
		fatal("%s:%d: unrecognized instruction (%08x %08x)",
				__FILE__, __LINE__, inst_bytes.as_uint[0], inst_bytes.as_uint[1]);
	for (thread_id = 0; thread_id < self->thread_count; ++thread_id)
	{
		thread = self->threads[thread_id];
		emu->inst_func[inst_op](thread, inst);
	}

	/* Finish */
	if (self->finished)
	{
		assert(list_index_of(thread_block->running_warps, self) != -1);
		assert(list_index_of(thread_block->finished_warps, self) == -1);
		list_remove(thread_block->running_warps, self);
		list_add(thread_block->finished_warps, self);

		return;
	}

	/* Update PC */
/*	if (KplInstWrapGetCategory(inst) != KplInstCategoryCtrl)
		self->pc += self->inst_size;
	else
		self->pc = self->target_pc;
*/

	/* Stats */
	asEmu(emu)->instructions++;
	self->inst_count++;
}
Esempio n. 5
0
int MIPSEmuRun(Emu *self)
{
	MIPSEmu *emu = asMIPSEmu(self);

	struct mips_ctx_t *ctx;

	/* Stop if there is no context running */
	if (emu->finished_list_count >= emu->context_list_count)
		return FALSE;

	/* Stop if maximum number of CPU instructions exceeded */
	if (mips_emu_max_inst && asEmu(mips_emu)->instructions >= mips_emu_max_inst)
		esim_finish = esim_finish_mips_max_inst;

	/* Stop if any previous reason met */
	if (esim_finish)
		return TRUE;

	/* Run an instruction from every running process */
	for (ctx = emu->running_list_head; ctx; ctx = ctx->running_list_next)
		mips_ctx_execute(ctx);

	/* Free finished contexts */
	while (emu->finished_list_head)
		mips_ctx_free(emu->finished_list_head);

	/* Still running */
	return TRUE;
}
Esempio n. 6
0
void SIEmuCreate(SIEmu *self)
{
	/* Parent */
	EmuCreate(asEmu(self), "SouthernIslands");

	/* Initialize */
	self->video_mem = mem_create();
	self->video_mem->safe = 0;
	self->video_mem_top = 0;
	self->waiting_work_groups = list_create();
	self->running_work_groups = list_create();
	
	/* Set global memory to video memory by default */
	self->global_mem = self->video_mem;

	/* Virtual functions */
	asObject(self)->Dump = SIEmuDump;
	asEmu(self)->DumpSummary = SIEmuDumpSummary;
	asEmu(self)->Run = SIEmuRun;
}
Esempio n. 7
0
void EvgGpuDumpSummary(Timing *self, FILE *f)
{
	double inst_per_cycle;

	/* Call parent */
	TimingDumpSummary(asTiming(self), f);

	/* Additional statistics */
	inst_per_cycle = asTiming(evg_gpu)->cycle ?
			(double) asEmu(evg_emu)->instructions
			/ asTiming(evg_gpu)->cycle : 0.0;
	fprintf(f, "IPC = %.4g\n", inst_per_cycle);
}
Esempio n. 8
0
void FrmEmuCreate(FrmEmu *self)
{
    /* Parent */
    EmuCreate(asEmu(self), "Fermi");

    /* Initialize */
    self->grids = list_create();
    self->pending_grids = list_create();
    self->running_grids = list_create();
    self->finished_grids = list_create();
    self->global_mem = mem_create();
    self->global_mem->safe = 0;
    self->global_mem_top = 0;
    self->total_global_mem_size = 1 << 31; /* 2GB */
    self->free_global_mem_size = 1 << 31; /* 2GB */
    self->const_mem = mem_create();
    self->const_mem->safe = 0;

    /* Virtual functions */
    asObject(self)->Dump = FrmEmuDump;
    asEmu(self)->DumpSummary = FrmEmuDumpSummary;
    asEmu(self)->Run = FrmEmuRun;
}
Esempio n. 9
0
void X86ContextExecute(X86Context *self)
{
	X86Emu *emu = self->emu;

	struct x86_regs_t *regs = self->regs;
	struct mem_t *mem = self->mem;

	unsigned char buffer[20];
	unsigned char *buffer_ptr;

	int spec_mode;

	/* Memory permissions should not be checked if the context is executing in
	 * speculative mode. This will prevent guest segmentation faults to occur. */
	spec_mode = X86ContextGetState(self, X86ContextSpecMode);
	mem->safe = spec_mode ? 0 : mem_safe_mode;

	/* Read instruction from memory. Memory should be accessed here in unsafe mode
	 * (i.e., allowing segmentation faults) if executing speculatively. */
	buffer_ptr = mem_get_buffer(mem, regs->eip, 20, mem_access_exec);
	if (!buffer_ptr)
	{
		/* Disable safe mode. If a part of the 20 read bytes does not belong to the
		 * actual instruction, and they lie on a page with no permissions, this would
		 * generate an undesired protection fault. */
		mem->safe = 0;
		buffer_ptr = buffer;
		mem_access(mem, regs->eip, 20, buffer_ptr, mem_access_exec);
	}
	mem->safe = mem_safe_mode;

	/* Disassemble */
	X86InstDecode(&self->inst, regs->eip, buffer_ptr);
	if (self->inst.opcode == X86InstOpcodeInvalid && !spec_mode)
		fatal("0x%x: not supported x86 instruction (%02x %02x %02x %02x...)",
			regs->eip, buffer_ptr[0], buffer_ptr[1], buffer_ptr[2], buffer_ptr[3]);


	/* Stop if instruction matches last instruction bytes */
	if (x86_emu_last_inst_size &&
		x86_emu_last_inst_size == self->inst.size &&
		!memcmp(x86_emu_last_inst_bytes, buffer_ptr, x86_emu_last_inst_size))
		esim_finish = esim_finish_x86_last_inst;

	/* Execute instruction */
	X86ContextExecuteInst(self);
	
	/* Statistics */
	asEmu(emu)->instructions++;
}
Esempio n. 10
0
int X86CpuRun(Timing *self) {
  X86Cpu *cpu = asX86Cpu(self);
  X86Emu *emu = cpu->emu;

  /* Stop if no context is running */
  if (emu->finished_list_count >= emu->context_list_count) return FALSE;

  /* Fast-forward simulation */
  if (x86_cpu_fast_forward_count &&
      asEmu(emu)->instructions < x86_cpu_fast_forward_count)
    X86CpuFastForward(cpu);

  /* Stop if maximum number of CPU instructions exceeded */
  if (x86_emu_max_inst &&
      cpu->num_committed_inst >= x86_emu_max_inst - x86_cpu_fast_forward_count)
    esim_finish = esim_finish_x86_max_inst;

  /* Stop if maximum number of cycles exceeded */
  if (x86_emu_max_cycles && self->cycle >= x86_emu_max_cycles)
    esim_finish = esim_finish_x86_max_cycles;

  /* Stop if any previous reason met */
  if (esim_finish) return TRUE;

  /* One more cycle of x86 timing simulation */
  self->cycle++;

  /* Empty uop trace list. This dumps the last trace line for instructions
   * that were freed in the previous simulation cycle. */
  X86CpuEmptyTraceList(cpu);

  /* Processor stages */
  X86CpuRunStages(cpu);

  /* Process host threads generating events */
  X86EmuProcessEvents(emu);

  /* Still simulating */
  return TRUE;
}
Esempio n. 11
0
void X86ContextDestroy(X86Context *self)
{
	X86Emu *emu = self->emu;

	/* If context is not finished/zombie, finish it first.
	 * This removes all references to current freed context. */
	if (!X86ContextGetState(self, X86ContextFinished | X86ContextZombie))
		X86ContextFinish(self, 0);
	
	/* Remove context from finished contexts list. This should
	 * be the only list the context is in right now. */
	assert(!DOUBLE_LINKED_LIST_MEMBER(emu, running, self));
	assert(!DOUBLE_LINKED_LIST_MEMBER(emu, suspended, self));
	assert(!DOUBLE_LINKED_LIST_MEMBER(emu, zombie, self));
	assert(DOUBLE_LINKED_LIST_MEMBER(emu, finished, self));
	DOUBLE_LINKED_LIST_REMOVE(emu, finished, self);
		
	/* Free private structures */
	x86_regs_free(self->regs);
	x86_regs_free(self->backup_regs);
	x86_signal_mask_table_free(self->signal_mask_table);
	spec_mem_free(self->spec_mem);
	bit_map_free(self->affinity);

	/* Unlink shared structures */
	x86_loader_unlink(self->loader);
	x86_signal_handler_table_unlink(self->signal_handler_table);
	x86_file_desc_table_unlink(self->file_desc_table);
	mem_unlink(self->mem);

	/* Remove context from contexts list and free */
	DOUBLE_LINKED_LIST_REMOVE(emu, context, self);
	X86ContextDebug("inst %lld: context %d freed\n",
			asEmu(emu)->instructions, self->pid);

	/* Static instruction */
	delete_static(&self->inst);
}
Esempio n. 12
0
/* FIXME - merge with ctx_execute */
void mips_isa_execute_inst(struct mips_ctx_t *ctx) {
  //	struct mips_regs_t *regs = ctx->regs;
  ctx->next_ip = ctx->n_next_ip;
  ctx->n_next_ip += 4;

  /* Debug */
  if (debug_status(mips_isa_inst_debug_category)) {
    mips_isa_inst_debug("%d %8lld %x: ", ctx->pid,
                        asEmu(mips_emu)->instructions, ctx->regs->pc);
    mips_inst_debug_dump(&ctx->inst, debug_file(mips_isa_inst_debug_category));
  }

  /* Call instruction emulation function */
  //	regs->pc = regs->pc + ctx->inst.info->size;
  if (ctx->inst.info->opcode) mips_isa_inst_func[ctx->inst.info->opcode](ctx);
  /* Statistics */
  mips_inst_freq[ctx->inst.info->opcode]++;

  /* Debug */
  mips_isa_inst_debug("\n");
  //	if (debug_status(mips_isa_call_debug_category))
  //		mips_isa_debug_call(ctx);
}
Esempio n. 13
0
static void X86ContextUpdateState(X86Context *self, X86ContextState state)
{
	X86Emu *emu = self->emu;

	X86ContextState status_diff;
	char state_str[MAX_STRING_SIZE];

	/* Remove contexts from the following lists:
	 *   running, suspended, zombie */
	if (DOUBLE_LINKED_LIST_MEMBER(emu, running, self))
		DOUBLE_LINKED_LIST_REMOVE(emu, running, self);
	if (DOUBLE_LINKED_LIST_MEMBER(emu, suspended, self))
		DOUBLE_LINKED_LIST_REMOVE(emu, suspended, self);
	if (DOUBLE_LINKED_LIST_MEMBER(emu, zombie, self))
		DOUBLE_LINKED_LIST_REMOVE(emu, zombie, self);
	if (DOUBLE_LINKED_LIST_MEMBER(emu, finished, self))
		DOUBLE_LINKED_LIST_REMOVE(emu, finished, self);
	
	/* If the difference between the old and new state lies in other
	 * states other than 'x86_ctx_specmode', a reschedule is marked. */
	status_diff = self->state ^ state;
	if (status_diff & ~X86ContextSpecMode)
		emu->schedule_signal = 1;
	
	/* Update state */
	self->state = state;
	if (self->state & X86ContextFinished)
		self->state = X86ContextFinished
				| (state & X86ContextAlloc)
				| (state & X86ContextMapped);
	if (self->state & X86ContextZombie)
		self->state = X86ContextZombie
				| (state & X86ContextAlloc)
				| (state & X86ContextMapped);
	if (!(self->state & X86ContextSuspended) &&
		!(self->state & X86ContextFinished) &&
		!(self->state & X86ContextZombie) &&
		!(self->state & X86ContextLocked))
		self->state |= X86ContextRunning;
	else
		self->state &= ~X86ContextRunning;
	
	/* Insert context into the corresponding lists. */
	if (self->state & X86ContextRunning)
		DOUBLE_LINKED_LIST_INSERT_HEAD(emu, running, self);
	if (self->state & X86ContextZombie)
		DOUBLE_LINKED_LIST_INSERT_HEAD(emu, zombie, self);
	if (self->state & X86ContextFinished)
		DOUBLE_LINKED_LIST_INSERT_HEAD(emu, finished, self);
	if (self->state & X86ContextSuspended)
		DOUBLE_LINKED_LIST_INSERT_HEAD(emu, suspended, self);
	
	/* Dump new state (ignore 'x86_ctx_specmode' state, it's too frequent) */
	if (debug_status(x86_context_debug_category) && (status_diff & ~X86ContextSpecMode))
	{
		str_map_flags(&x86_context_state_map, self->state, state_str, sizeof state_str);
		X86ContextDebug("inst %lld: ctx %d changed state to %s\n",
			asEmu(emu)->instructions, self->pid, state_str);
	}

	/* Start/stop x86 timer depending on whether there are any contexts
	 * currently running. */
	if (emu->running_list_count)
		m2s_timer_start(asEmu(emu)->timer);
	else
		m2s_timer_stop(asEmu(emu)->timer);
}
Esempio n. 14
0
void X86CpuDumpReport(X86Cpu *self, FILE *f) {
  X86Emu *emu = self->emu;
  X86Core *core;
  X86Thread *thread;

  long long now;

  int i;
  int j;

  /* Get CPU timer value */
  now = m2s_timer_get_value(asEmu(emu)->timer);

  /* Dump CPU configuration */
  fprintf(f, ";\n; CPU Configuration\n;\n\n");
  X86DumpCpuConfig(f);

  /* Report for the complete processor */
  fprintf(f, ";\n; Simulation Statistics\n;\n\n");
  fprintf(f, "; Global statistics\n");
  fprintf(f, "[ Global ]\n\n");
  fprintf(f, "Cycles = %lld\n", asTiming(self)->cycle);
  fprintf(f, "Time = %.2f\n", (double)now / 1000000);
  fprintf(f, "CyclesPerSecond = %.0f\n",
          now ? (double)asTiming(self)->cycle / now * 1000000 : 0.0);
  fprintf(f, "MemoryUsed = %lu\n", (long)mem_mapped_space);
  fprintf(f, "MemoryUsedMax = %lu\n", (long)mem_max_mapped_space);
  fprintf(f, "\n");

  /* Dispatch stage */
  fprintf(f, "; Dispatch stage\n");
  X86CpuDumpUopReport(self, f, self->num_dispatched_uinst_array, "Dispatch",
                      x86_cpu_dispatch_width);

  /* Issue stage */
  fprintf(f, "; Issue stage\n");
  X86CpuDumpUopReport(self, f, self->num_issued_uinst_array, "Issue",
                      x86_cpu_issue_width);

  /* Commit stage */
  fprintf(f, "; Commit stage\n");
  X86CpuDumpUopReport(self, f, self->num_committed_uinst_array, "Commit",
                      x86_cpu_commit_width);

  /* Committed branches */
  fprintf(f, "; Committed branches\n");
  fprintf(f, ";    Branches - Number of committed control uops\n");
  fprintf(
      f, ";    Squashed - Number of mispredicted uops squashed from the ROB\n");
  fprintf(
      f,
      ";    Mispred - Number of mispredicted branches in the correct path\n");
  fprintf(f, ";    PredAcc - Prediction accuracy\n");
  fprintf(f, "Commit.Branches = %lld\n", self->num_branch_uinst);
  fprintf(f, "Commit.Squashed = %lld\n", self->num_squashed_uinst);
  fprintf(f, "Commit.Mispred = %lld\n", self->num_mispred_branch_uinst);
  fprintf(
      f, "Commit.PredAcc = %.4g\n",
      self->num_branch_uinst
          ? (double)(self->num_branch_uinst - self->num_mispred_branch_uinst) /
                self->num_branch_uinst
          : 0.0);
  fprintf(f, "\n");

  /* Report for each core */
  for (i = 0; i < x86_cpu_num_cores; i++) {
    /* Core */
    core = self->cores[i];
    fprintf(f, "\n; Statistics for core %d\n", core->id);
    fprintf(f, "[ c%d ]\n\n", core->id);

    /* Functional units */
    X86CoreDumpFunctionalUnitsReport(core, f);

    /* Dispatch slots */
    if (x86_cpu_dispatch_kind == x86_cpu_dispatch_kind_timeslice) {
      fprintf(f, "; Dispatch slots usage (sum = cycles * dispatch width)\n");
      fprintf(f, ";    used - dispatch slot was used by a non-spec uop\n");
      fprintf(f, ";    spec - used by a mispeculated uop\n");
      fprintf(f, ";    ctx - no context allocated to thread\n");
      fprintf(f, ";    uopq,rob,iq,lsq,rename - no space in structure\n");
      DUMP_DISPATCH_STAT(used);
      DUMP_DISPATCH_STAT(spec);
      DUMP_DISPATCH_STAT(uop_queue);
      DUMP_DISPATCH_STAT(rob);
      DUMP_DISPATCH_STAT(iq);
      DUMP_DISPATCH_STAT(lsq);
      DUMP_DISPATCH_STAT(rename);
      DUMP_DISPATCH_STAT(ctx);
      fprintf(f, "\n");
    }

    /* Dispatch stage */
    fprintf(f, "; Dispatch stage\n");
    X86CpuDumpUopReport(self, f, core->num_dispatched_uinst_array, "Dispatch",
                        x86_cpu_dispatch_width);

    /* Issue stage */
    fprintf(f, "; Issue stage\n");
    X86CpuDumpUopReport(self, f, core->num_issued_uinst_array, "Issue",
                        x86_cpu_issue_width);

    /* Commit stage */
    fprintf(f, "; Commit stage\n");
    X86CpuDumpUopReport(self, f, core->num_committed_uinst_array, "Commit",
                        x86_cpu_commit_width);

    /* Committed branches */
    fprintf(f, "; Committed branches\n");
    fprintf(f, "Commit.Branches = %lld\n", core->num_branch_uinst);
    fprintf(f, "Commit.Squashed = %lld\n", core->num_squashed_uinst);
    fprintf(f, "Commit.Mispred = %lld\n", core->num_mispred_branch_uinst);
    fprintf(f, "Commit.PredAcc = %.4g\n",
            core->num_branch_uinst
                ? (double)(core->num_branch_uinst -
                           core->num_mispred_branch_uinst) /
                      core->num_branch_uinst
                : 0.0);
    fprintf(f, "\n");

    /* Occupancy stats */
    fprintf(f, "; Structure statistics (reorder buffer, instruction queue,\n");
    fprintf(f,
            "; load-store queue, and integer/floating-point register file)\n");
    fprintf(f, ";    Size - Available size\n");
    fprintf(f, ";    Occupancy - Average number of occupied entries\n");
    fprintf(f, ";    Full - Number of cycles when the structure was full\n");
    fprintf(f, ";    Reads, Writes - Accesses to the structure\n");
    if (x86_rob_kind == x86_rob_kind_shared) DUMP_CORE_STRUCT_STATS(ROB, rob);
    if (x86_iq_kind == x86_iq_kind_shared) {
      DUMP_CORE_STRUCT_STATS(IQ, iq);
      fprintf(f, "IQ.WakeupAccesses = %lld\n", core->iq_wakeup_accesses);
    }
    if (x86_lsq_kind == x86_lsq_kind_shared) DUMP_CORE_STRUCT_STATS(LSQ, lsq);
    if (x86_reg_file_kind == x86_reg_file_kind_shared) {
      DUMP_CORE_STRUCT_STATS(RF_Int, reg_file_int);
      DUMP_CORE_STRUCT_STATS(RF_Fp, reg_file_fp);
    }
    fprintf(f, "\n");

    /* Report for each thread */
    for (j = 0; j < x86_cpu_num_threads; j++) {
      thread = core->threads[j];
      fprintf(f, "\n; Statistics for core %d - thread %d\n", core->id,
              thread->id_in_core);
      fprintf(f, "[ %s ]\n\n", thread->name);

      /* Dispatch stage */
      fprintf(f, "; Dispatch stage\n");
      X86CpuDumpUopReport(self, f, thread->num_dispatched_uinst_array,
                          "Dispatch", x86_cpu_dispatch_width);

      /* Issue stage */
      fprintf(f, "; Issue stage\n");
      X86CpuDumpUopReport(self, f, thread->num_issued_uinst_array, "Issue",
                          x86_cpu_issue_width);

      /* Commit stage */
      fprintf(f, "; Commit stage\n");
      X86CpuDumpUopReport(self, f, thread->num_committed_uinst_array, "Commit",
                          x86_cpu_commit_width);

      /* Committed branches */
      fprintf(f, "; Committed branches\n");
      fprintf(f, "Commit.Branches = %lld\n", thread->num_branch_uinst);
      fprintf(f, "Commit.Squashed = %lld\n", thread->num_squashed_uinst);
      fprintf(f, "Commit.Mispred = %lld\n", thread->num_mispred_branch_uinst);
      fprintf(f, "Commit.PredAcc = %.4g\n",
              thread->num_branch_uinst
                  ? (double)(thread->num_branch_uinst -
                             thread->num_mispred_branch_uinst) /
                        thread->num_branch_uinst
                  : 0.0);
      fprintf(f, "\n");

      /* Occupancy stats */
      fprintf(f,
              "; Structure statistics (reorder buffer, instruction queue, "
              "load-store queue,\n");
      fprintf(f,
              "; integer/floating-point register file, and renaming table)\n");
      if (x86_rob_kind == x86_rob_kind_private)
        DUMP_THREAD_STRUCT_STATS(ROB, rob);
      if (x86_iq_kind == x86_iq_kind_private) {
        DUMP_THREAD_STRUCT_STATS(IQ, iq);
        fprintf(f, "IQ.WakeupAccesses = %lld\n", thread->iq_wakeup_accesses);
      }
      if (x86_lsq_kind == x86_lsq_kind_private)
        DUMP_THREAD_STRUCT_STATS(LSQ, lsq);
      if (x86_reg_file_kind == x86_reg_file_kind_private) {
        DUMP_THREAD_STRUCT_STATS(RF_Int, reg_file_int);
        DUMP_THREAD_STRUCT_STATS(RF_Fp, reg_file_fp);
      }
      fprintf(f, "RAT.IntReads = %lld\n", thread->rat_int_reads);
      fprintf(f, "RAT.IntWrites = %lld\n", thread->rat_int_writes);
      fprintf(f, "RAT.FpReads = %lld\n", thread->rat_fp_reads);
      fprintf(f, "RAT.FpWrites = %lld\n", thread->rat_fp_writes);
      fprintf(f, "BTB.Reads = %lld\n", thread->btb_reads);
      fprintf(f, "BTB.Writes = %lld\n", thread->btb_writes);
      fprintf(f, "\n");

      /* Trace cache stats */
      if (thread->trace_cache) X86ThreadDumpTraceCacheReport(thread, f);
    }
  }
}
Esempio n. 15
0
int EvgGpuRun(Timing *self)
{
	EvgGpu *gpu = asEvgGpu(self);

	struct evg_ndrange_t *ndrange;

	struct evg_compute_unit_t *compute_unit;
	struct evg_compute_unit_t *compute_unit_next;

	/* For efficiency when no Evergreen emulation is selected, exit here
	 * if the list of existing ND-Ranges is empty. */
	if (!evg_emu->ndrange_list_count)
		return FALSE;

	/* Start one ND-Range in state 'pending' */
	while ((ndrange = evg_emu->pending_ndrange_list_head))
	{
		/* Currently not supported for more than 1 ND-Range */
		if (gpu->ndrange)
			fatal("%s: Evergreen GPU timing simulation not supported for multiple ND-Ranges",
				__FUNCTION__);

		/* Set ND-Range status to 'running' */
		evg_ndrange_clear_status(ndrange, evg_ndrange_pending);
		evg_ndrange_set_status(ndrange, evg_ndrange_running);

		/* Trace */
		evg_trace("evg.new_ndrange "
			"id=%d "
			"wg_first=%d "
			"wg_count=%d\n",
			ndrange->id,
			ndrange->work_group_id_first,
			ndrange->work_group_count);

		/* Map ND-Range to GPU */
		evg_gpu_map_ndrange(ndrange);
		evg_calc_plot();
	}

	/* Mapped ND-Range */
	ndrange = gpu->ndrange;
	assert(ndrange);

	/* Allocate work-groups to compute units */
	while (gpu->ready_list_head && ndrange->pending_list_head)
		evg_compute_unit_map_work_group(gpu->ready_list_head,
			ndrange->pending_list_head);

	/* One more cycle */
	asTiming(evg_gpu)->cycle++;

	/* Stop if maximum number of GPU cycles exceeded */
	if (evg_emu_max_cycles && asTiming(evg_gpu)->cycle >= evg_emu_max_cycles)
		esim_finish = esim_finish_evg_max_cycles;

	/* Stop if maximum number of GPU instructions exceeded */
	if (evg_emu_max_inst && asEmu(evg_emu)->instructions >= evg_emu_max_inst)
		esim_finish = esim_finish_evg_max_inst;
	
	/* Stop if there was a simulation stall */
	if (asTiming(evg_gpu)->cycle - gpu->last_complete_cycle > 1000000)
	{
		warning("Evergreen GPU simulation stalled.\n%s",
			evg_err_stall);
		esim_finish = esim_finish_stall;
	}

	/* Stop if any reason met */
	if (esim_finish)
		return TRUE;

	/* Free instructions in trash */
	evg_gpu_uop_trash_empty();

	/* Run one loop iteration on each busy compute unit */
	for (compute_unit = gpu->busy_list_head; compute_unit;
		compute_unit = compute_unit_next)
	{
		/* Store next busy compute unit, since this can change
		 * during the compute unit simulation loop iteration. */
		compute_unit_next = compute_unit->busy_list_next;

		/* Run one cycle */
		evg_compute_unit_run(compute_unit);
	}

	/* GPU-REL: insert stack faults */
	evg_faults_insert();

	/* If ND-Range finished execution in all compute units, free it. */
	if (!gpu->busy_list_count)
	{
		/* Dump ND-Range report */
		evg_ndrange_dump(ndrange, evg_emu_report_file);

		/* Stop if maximum number of kernels reached */
		if (evg_emu_max_kernels && evg_emu->ndrange_count >= evg_emu_max_kernels)
			esim_finish = esim_finish_evg_max_kernels;

		/* Finalize and free ND-Range */
		assert(evg_ndrange_get_status(ndrange, evg_ndrange_finished));
		evg_gpu_uop_trash_empty();
		evg_gpu_unmap_ndrange();
		evg_ndrange_free(ndrange);
	}

	/* Still simulating */
	return TRUE;
}
Esempio n. 16
0
void evg_gpu_dump_report(void)
{
	struct evg_compute_unit_t *compute_unit;
	struct mod_t *local_mod;
	int compute_unit_id;

	FILE *f;

	double inst_per_cycle;
	double cf_inst_per_cycle;
	double alu_inst_per_cycle;
	double tex_inst_per_cycle;

	long long coalesced_reads;
	long long coalesced_writes;

	char vliw_occupancy[MAX_STRING_SIZE];

	/* Open file */
	f = file_open_for_write(evg_gpu_report_file_name);
	if (!f)
		return;

	/* Dump GPU configuration */
	fprintf(f, ";\n; GPU Configuration\n;\n\n");
	evg_config_dump(f);

	/* Report for device */
	fprintf(f, ";\n; Simulation Statistics\n;\n\n");
	inst_per_cycle = asTiming(evg_gpu)->cycle ? (double) asEmu(evg_emu)->instructions
			/ asTiming(evg_gpu)->cycle : 0.0;
	fprintf(f, "[ Device ]\n\n");
	fprintf(f, "NDRangeCount = %d\n", evg_emu->ndrange_count);
	fprintf(f, "Instructions = %lld\n", asEmu(evg_emu)->instructions);
	fprintf(f, "Cycles = %lld\n", asTiming(evg_gpu)->cycle);
	fprintf(f, "InstructionsPerCycle = %.4g\n", inst_per_cycle);
	fprintf(f, "\n\n");

	/* Report for compute units */
	EVG_GPU_FOREACH_COMPUTE_UNIT(compute_unit_id)
	{
		compute_unit = evg_gpu->compute_units[compute_unit_id];
		local_mod = compute_unit->local_memory;

		inst_per_cycle = compute_unit->cycle ? (double) compute_unit->inst_count
			/ compute_unit->cycle : 0.0;
		cf_inst_per_cycle = compute_unit->cycle ? (double) compute_unit->cf_engine.inst_count
			/ compute_unit->cycle : 0.0;
		alu_inst_per_cycle = compute_unit->alu_engine.cycle ? (double) compute_unit->alu_engine.inst_count
			/ compute_unit->alu_engine.cycle : 0.0;
		tex_inst_per_cycle = compute_unit->tex_engine.cycle ? (double) compute_unit->tex_engine.inst_count
			/ compute_unit->tex_engine.cycle : 0.0;
		coalesced_reads = local_mod->reads - local_mod->effective_reads;
		coalesced_writes = local_mod->writes - local_mod->effective_writes;
		snprintf(vliw_occupancy, MAX_STRING_SIZE, "%lld %lld %lld %lld %lld",
			compute_unit->alu_engine.vliw_slots[0],
			compute_unit->alu_engine.vliw_slots[1],
			compute_unit->alu_engine.vliw_slots[2],
			compute_unit->alu_engine.vliw_slots[3],
			compute_unit->alu_engine.vliw_slots[4]);

		fprintf(f, "[ ComputeUnit %d ]\n\n", compute_unit_id);

		fprintf(f, "WorkGroupCount = %lld\n", compute_unit->mapped_work_groups);
		fprintf(f, "Instructions = %lld\n", compute_unit->inst_count);
		fprintf(f, "Cycles = %lld\n", compute_unit->cycle);
		fprintf(f, "InstructionsPerCycle = %.4g\n", inst_per_cycle);
		fprintf(f, "\n");

		fprintf(f, "CFEngine.Instructions = %lld\n", compute_unit->cf_engine.inst_count);
		fprintf(f, "CFEngine.InstructionsPerCycle = %.4g\n", cf_inst_per_cycle);
		fprintf(f, "CFEngine.ALUClauseTriggers = %lld\n", compute_unit->cf_engine.alu_clause_trigger_count);
		fprintf(f, "CFEngine.TEXClauseTriggers = %lld\n", compute_unit->cf_engine.tex_clause_trigger_count);
		fprintf(f, "CFEngine.GlobalMemWrites = %lld\n", compute_unit->cf_engine.global_mem_write_count);
		fprintf(f, "\n");

		fprintf(f, "ALUEngine.WavefrontCount = %lld\n", compute_unit->alu_engine.wavefront_count);
		fprintf(f, "ALUEngine.Instructions = %lld\n", compute_unit->alu_engine.inst_count);
		fprintf(f, "ALUEngine.InstructionSlots = %lld\n", compute_unit->alu_engine.inst_slot_count);
		fprintf(f, "ALUEngine.LocalMemorySlots = %lld\n", compute_unit->alu_engine.local_mem_slot_count);
		fprintf(f, "ALUEngine.VLIWOccupancy = %s\n", vliw_occupancy);
		fprintf(f, "ALUEngine.Cycles = %lld\n", compute_unit->alu_engine.cycle);
		fprintf(f, "ALUEngine.InstructionsPerCycle = %.4g\n", alu_inst_per_cycle);
		fprintf(f, "\n");

		fprintf(f, "TEXEngine.WavefrontCount = %lld\n", compute_unit->tex_engine.wavefront_count);
		fprintf(f, "TEXEngine.Instructions = %lld\n", compute_unit->tex_engine.inst_count);
		fprintf(f, "TEXEngine.Cycles = %lld\n", compute_unit->tex_engine.cycle);
		fprintf(f, "TEXEngine.InstructionsPerCycle = %.4g\n", tex_inst_per_cycle);
		fprintf(f, "\n");

		fprintf(f, "LocalMemory.Accesses = %lld\n", local_mod->reads + local_mod->writes);
		fprintf(f, "LocalMemory.Reads = %lld\n", local_mod->reads);
		fprintf(f, "LocalMemory.EffectiveReads = %lld\n", local_mod->effective_reads);
		fprintf(f, "LocalMemory.CoalescedReads = %lld\n", coalesced_reads);
		fprintf(f, "LocalMemory.Writes = %lld\n", local_mod->writes);
		fprintf(f, "LocalMemory.EffectiveWrites = %lld\n", local_mod->effective_writes);
		fprintf(f, "LocalMemory.CoalescedWrites = %lld\n", coalesced_writes);
		fprintf(f, "\n\n");
	}
}