void mips_sys_call(struct mips_ctx_t *ctx) { struct mips_regs_t *regs = ctx->regs; int code; int err; /* System call code */ code = regs->regs_R[2] - __NR_Linux; if (code < 1 || code >= mips_sys_code_count) fatal("%s: invalid system call code (%d)", __FUNCTION__, code); /* Statistics */ mips_sys_call_freq[code]++; /* Debug */ mips_sys_debug("'%s' (code %d, inst %lld, pid %d)\n", mips_sys_call_name[code], code, asEmu(mips_emu)->instructions, ctx->pid); mips_isa_call_debug("system call '%s' (code %d, inst %lld, pid %d)\n", mips_sys_call_name[code], code, asEmu(mips_emu)->instructions, ctx->pid); /* Perform system call */ err = mips_sys_call_func[code](ctx); /* Set return value in 'eax', except for 'sigreturn' system call. Also, if the * context got suspended, the wake up routine will set the return value. */ if (code != mips_sys_code_sigreturn && !mips_ctx_get_status(ctx, mips_ctx_suspended)) regs->regs_R[2] = err; /* Debug */ mips_sys_debug(" ret=(%d, 0x%x)", err, err); if (err < 0 && err >= -SIM_ERRNO_MAX) mips_sys_debug(", errno=%s)", str_map_value(&mips_sys_error_code_map, -err)); mips_sys_debug("\n"); }
void MIPSEmuCreate(MIPSEmu *self) { /* Parent */ EmuCreate(asEmu(self), "MIPS"); /* Initialize */ self->current_pid = 100; pthread_mutex_init(&self->process_events_mutex, NULL); /* Virtual functions */ asObject(self)->Dump = MIPSEmuDump; asEmu(self)->DumpSummary = MIPSEmuDumpSummary; asEmu(self)->Run = MIPSEmuRun; }
/* Run fast-forward simulation */ void X86CpuFastForward(X86Cpu *self) { X86Emu *emu = self->emu; /* Fast-forward simulation. Run 'x86_cpu_fast_forward' iterations of the x86 * emulation loop until any simulation end reason is detected. */ while (asEmu(emu)->instructions < x86_cpu_fast_forward_count && !esim_finish) X86EmuRun(asEmu(emu)); /* Record number of instructions in fast-forward execution. */ self->num_fast_forward_inst = asEmu(emu)->instructions; /* Output warning if simulation finished during fast-forward execution. */ if (esim_finish) warning("x86 fast-forwarding finished simulation.\n%s", x86_cpu_err_fast_forward); }
void KplWarpExecute(KplWarp *self) { KplEmu *emu; KplGrid *grid; KplThreadBlock *thread_block; KplThread *thread; struct KplInstWrap *inst; KplInstBytes inst_bytes; KplInstOpcode inst_op; int thread_id; /* Get current arch, grid, and thread-block */ thread_block = self->thread_block; grid = thread_block->grid; emu = grid->emu; /* Get instruction */ inst_bytes.as_uint[0] = self->inst_buffer[self->pc / self->inst_size] >> 32; inst_bytes.as_uint[1] = self->inst_buffer[self->pc / self->inst_size]; kpl_isa_debug("%s:%d: warp[%d] executes instruction [0x%x] 0x%016llx\n", __FILE__, __LINE__, self->id, self->pc, inst_bytes.as_dword); /* Decode instruction */ inst = self->inst; KplInstWrapDecode(inst, self->pc, &inst_bytes); /* Execute instruction */ inst_op = KplInstWrapGetOpcode(inst); if (!inst_op) fatal("%s:%d: unrecognized instruction (%08x %08x)", __FILE__, __LINE__, inst_bytes.as_uint[0], inst_bytes.as_uint[1]); for (thread_id = 0; thread_id < self->thread_count; ++thread_id) { thread = self->threads[thread_id]; emu->inst_func[inst_op](thread, inst); } /* Finish */ if (self->finished) { assert(list_index_of(thread_block->running_warps, self) != -1); assert(list_index_of(thread_block->finished_warps, self) == -1); list_remove(thread_block->running_warps, self); list_add(thread_block->finished_warps, self); return; } /* Update PC */ /* if (KplInstWrapGetCategory(inst) != KplInstCategoryCtrl) self->pc += self->inst_size; else self->pc = self->target_pc; */ /* Stats */ asEmu(emu)->instructions++; self->inst_count++; }
int MIPSEmuRun(Emu *self) { MIPSEmu *emu = asMIPSEmu(self); struct mips_ctx_t *ctx; /* Stop if there is no context running */ if (emu->finished_list_count >= emu->context_list_count) return FALSE; /* Stop if maximum number of CPU instructions exceeded */ if (mips_emu_max_inst && asEmu(mips_emu)->instructions >= mips_emu_max_inst) esim_finish = esim_finish_mips_max_inst; /* Stop if any previous reason met */ if (esim_finish) return TRUE; /* Run an instruction from every running process */ for (ctx = emu->running_list_head; ctx; ctx = ctx->running_list_next) mips_ctx_execute(ctx); /* Free finished contexts */ while (emu->finished_list_head) mips_ctx_free(emu->finished_list_head); /* Still running */ return TRUE; }
void SIEmuCreate(SIEmu *self) { /* Parent */ EmuCreate(asEmu(self), "SouthernIslands"); /* Initialize */ self->video_mem = mem_create(); self->video_mem->safe = 0; self->video_mem_top = 0; self->waiting_work_groups = list_create(); self->running_work_groups = list_create(); /* Set global memory to video memory by default */ self->global_mem = self->video_mem; /* Virtual functions */ asObject(self)->Dump = SIEmuDump; asEmu(self)->DumpSummary = SIEmuDumpSummary; asEmu(self)->Run = SIEmuRun; }
void EvgGpuDumpSummary(Timing *self, FILE *f) { double inst_per_cycle; /* Call parent */ TimingDumpSummary(asTiming(self), f); /* Additional statistics */ inst_per_cycle = asTiming(evg_gpu)->cycle ? (double) asEmu(evg_emu)->instructions / asTiming(evg_gpu)->cycle : 0.0; fprintf(f, "IPC = %.4g\n", inst_per_cycle); }
void FrmEmuCreate(FrmEmu *self) { /* Parent */ EmuCreate(asEmu(self), "Fermi"); /* Initialize */ self->grids = list_create(); self->pending_grids = list_create(); self->running_grids = list_create(); self->finished_grids = list_create(); self->global_mem = mem_create(); self->global_mem->safe = 0; self->global_mem_top = 0; self->total_global_mem_size = 1 << 31; /* 2GB */ self->free_global_mem_size = 1 << 31; /* 2GB */ self->const_mem = mem_create(); self->const_mem->safe = 0; /* Virtual functions */ asObject(self)->Dump = FrmEmuDump; asEmu(self)->DumpSummary = FrmEmuDumpSummary; asEmu(self)->Run = FrmEmuRun; }
void X86ContextExecute(X86Context *self) { X86Emu *emu = self->emu; struct x86_regs_t *regs = self->regs; struct mem_t *mem = self->mem; unsigned char buffer[20]; unsigned char *buffer_ptr; int spec_mode; /* Memory permissions should not be checked if the context is executing in * speculative mode. This will prevent guest segmentation faults to occur. */ spec_mode = X86ContextGetState(self, X86ContextSpecMode); mem->safe = spec_mode ? 0 : mem_safe_mode; /* Read instruction from memory. Memory should be accessed here in unsafe mode * (i.e., allowing segmentation faults) if executing speculatively. */ buffer_ptr = mem_get_buffer(mem, regs->eip, 20, mem_access_exec); if (!buffer_ptr) { /* Disable safe mode. If a part of the 20 read bytes does not belong to the * actual instruction, and they lie on a page with no permissions, this would * generate an undesired protection fault. */ mem->safe = 0; buffer_ptr = buffer; mem_access(mem, regs->eip, 20, buffer_ptr, mem_access_exec); } mem->safe = mem_safe_mode; /* Disassemble */ X86InstDecode(&self->inst, regs->eip, buffer_ptr); if (self->inst.opcode == X86InstOpcodeInvalid && !spec_mode) fatal("0x%x: not supported x86 instruction (%02x %02x %02x %02x...)", regs->eip, buffer_ptr[0], buffer_ptr[1], buffer_ptr[2], buffer_ptr[3]); /* Stop if instruction matches last instruction bytes */ if (x86_emu_last_inst_size && x86_emu_last_inst_size == self->inst.size && !memcmp(x86_emu_last_inst_bytes, buffer_ptr, x86_emu_last_inst_size)) esim_finish = esim_finish_x86_last_inst; /* Execute instruction */ X86ContextExecuteInst(self); /* Statistics */ asEmu(emu)->instructions++; }
int X86CpuRun(Timing *self) { X86Cpu *cpu = asX86Cpu(self); X86Emu *emu = cpu->emu; /* Stop if no context is running */ if (emu->finished_list_count >= emu->context_list_count) return FALSE; /* Fast-forward simulation */ if (x86_cpu_fast_forward_count && asEmu(emu)->instructions < x86_cpu_fast_forward_count) X86CpuFastForward(cpu); /* Stop if maximum number of CPU instructions exceeded */ if (x86_emu_max_inst && cpu->num_committed_inst >= x86_emu_max_inst - x86_cpu_fast_forward_count) esim_finish = esim_finish_x86_max_inst; /* Stop if maximum number of cycles exceeded */ if (x86_emu_max_cycles && self->cycle >= x86_emu_max_cycles) esim_finish = esim_finish_x86_max_cycles; /* Stop if any previous reason met */ if (esim_finish) return TRUE; /* One more cycle of x86 timing simulation */ self->cycle++; /* Empty uop trace list. This dumps the last trace line for instructions * that were freed in the previous simulation cycle. */ X86CpuEmptyTraceList(cpu); /* Processor stages */ X86CpuRunStages(cpu); /* Process host threads generating events */ X86EmuProcessEvents(emu); /* Still simulating */ return TRUE; }
void X86ContextDestroy(X86Context *self) { X86Emu *emu = self->emu; /* If context is not finished/zombie, finish it first. * This removes all references to current freed context. */ if (!X86ContextGetState(self, X86ContextFinished | X86ContextZombie)) X86ContextFinish(self, 0); /* Remove context from finished contexts list. This should * be the only list the context is in right now. */ assert(!DOUBLE_LINKED_LIST_MEMBER(emu, running, self)); assert(!DOUBLE_LINKED_LIST_MEMBER(emu, suspended, self)); assert(!DOUBLE_LINKED_LIST_MEMBER(emu, zombie, self)); assert(DOUBLE_LINKED_LIST_MEMBER(emu, finished, self)); DOUBLE_LINKED_LIST_REMOVE(emu, finished, self); /* Free private structures */ x86_regs_free(self->regs); x86_regs_free(self->backup_regs); x86_signal_mask_table_free(self->signal_mask_table); spec_mem_free(self->spec_mem); bit_map_free(self->affinity); /* Unlink shared structures */ x86_loader_unlink(self->loader); x86_signal_handler_table_unlink(self->signal_handler_table); x86_file_desc_table_unlink(self->file_desc_table); mem_unlink(self->mem); /* Remove context from contexts list and free */ DOUBLE_LINKED_LIST_REMOVE(emu, context, self); X86ContextDebug("inst %lld: context %d freed\n", asEmu(emu)->instructions, self->pid); /* Static instruction */ delete_static(&self->inst); }
/* FIXME - merge with ctx_execute */ void mips_isa_execute_inst(struct mips_ctx_t *ctx) { // struct mips_regs_t *regs = ctx->regs; ctx->next_ip = ctx->n_next_ip; ctx->n_next_ip += 4; /* Debug */ if (debug_status(mips_isa_inst_debug_category)) { mips_isa_inst_debug("%d %8lld %x: ", ctx->pid, asEmu(mips_emu)->instructions, ctx->regs->pc); mips_inst_debug_dump(&ctx->inst, debug_file(mips_isa_inst_debug_category)); } /* Call instruction emulation function */ // regs->pc = regs->pc + ctx->inst.info->size; if (ctx->inst.info->opcode) mips_isa_inst_func[ctx->inst.info->opcode](ctx); /* Statistics */ mips_inst_freq[ctx->inst.info->opcode]++; /* Debug */ mips_isa_inst_debug("\n"); // if (debug_status(mips_isa_call_debug_category)) // mips_isa_debug_call(ctx); }
static void X86ContextUpdateState(X86Context *self, X86ContextState state) { X86Emu *emu = self->emu; X86ContextState status_diff; char state_str[MAX_STRING_SIZE]; /* Remove contexts from the following lists: * running, suspended, zombie */ if (DOUBLE_LINKED_LIST_MEMBER(emu, running, self)) DOUBLE_LINKED_LIST_REMOVE(emu, running, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, suspended, self)) DOUBLE_LINKED_LIST_REMOVE(emu, suspended, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, zombie, self)) DOUBLE_LINKED_LIST_REMOVE(emu, zombie, self); if (DOUBLE_LINKED_LIST_MEMBER(emu, finished, self)) DOUBLE_LINKED_LIST_REMOVE(emu, finished, self); /* If the difference between the old and new state lies in other * states other than 'x86_ctx_specmode', a reschedule is marked. */ status_diff = self->state ^ state; if (status_diff & ~X86ContextSpecMode) emu->schedule_signal = 1; /* Update state */ self->state = state; if (self->state & X86ContextFinished) self->state = X86ContextFinished | (state & X86ContextAlloc) | (state & X86ContextMapped); if (self->state & X86ContextZombie) self->state = X86ContextZombie | (state & X86ContextAlloc) | (state & X86ContextMapped); if (!(self->state & X86ContextSuspended) && !(self->state & X86ContextFinished) && !(self->state & X86ContextZombie) && !(self->state & X86ContextLocked)) self->state |= X86ContextRunning; else self->state &= ~X86ContextRunning; /* Insert context into the corresponding lists. */ if (self->state & X86ContextRunning) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, running, self); if (self->state & X86ContextZombie) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, zombie, self); if (self->state & X86ContextFinished) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, finished, self); if (self->state & X86ContextSuspended) DOUBLE_LINKED_LIST_INSERT_HEAD(emu, suspended, self); /* Dump new state (ignore 'x86_ctx_specmode' state, it's too frequent) */ if (debug_status(x86_context_debug_category) && (status_diff & ~X86ContextSpecMode)) { str_map_flags(&x86_context_state_map, self->state, state_str, sizeof state_str); X86ContextDebug("inst %lld: ctx %d changed state to %s\n", asEmu(emu)->instructions, self->pid, state_str); } /* Start/stop x86 timer depending on whether there are any contexts * currently running. */ if (emu->running_list_count) m2s_timer_start(asEmu(emu)->timer); else m2s_timer_stop(asEmu(emu)->timer); }
void X86CpuDumpReport(X86Cpu *self, FILE *f) { X86Emu *emu = self->emu; X86Core *core; X86Thread *thread; long long now; int i; int j; /* Get CPU timer value */ now = m2s_timer_get_value(asEmu(emu)->timer); /* Dump CPU configuration */ fprintf(f, ";\n; CPU Configuration\n;\n\n"); X86DumpCpuConfig(f); /* Report for the complete processor */ fprintf(f, ";\n; Simulation Statistics\n;\n\n"); fprintf(f, "; Global statistics\n"); fprintf(f, "[ Global ]\n\n"); fprintf(f, "Cycles = %lld\n", asTiming(self)->cycle); fprintf(f, "Time = %.2f\n", (double)now / 1000000); fprintf(f, "CyclesPerSecond = %.0f\n", now ? (double)asTiming(self)->cycle / now * 1000000 : 0.0); fprintf(f, "MemoryUsed = %lu\n", (long)mem_mapped_space); fprintf(f, "MemoryUsedMax = %lu\n", (long)mem_max_mapped_space); fprintf(f, "\n"); /* Dispatch stage */ fprintf(f, "; Dispatch stage\n"); X86CpuDumpUopReport(self, f, self->num_dispatched_uinst_array, "Dispatch", x86_cpu_dispatch_width); /* Issue stage */ fprintf(f, "; Issue stage\n"); X86CpuDumpUopReport(self, f, self->num_issued_uinst_array, "Issue", x86_cpu_issue_width); /* Commit stage */ fprintf(f, "; Commit stage\n"); X86CpuDumpUopReport(self, f, self->num_committed_uinst_array, "Commit", x86_cpu_commit_width); /* Committed branches */ fprintf(f, "; Committed branches\n"); fprintf(f, "; Branches - Number of committed control uops\n"); fprintf( f, "; Squashed - Number of mispredicted uops squashed from the ROB\n"); fprintf( f, "; Mispred - Number of mispredicted branches in the correct path\n"); fprintf(f, "; PredAcc - Prediction accuracy\n"); fprintf(f, "Commit.Branches = %lld\n", self->num_branch_uinst); fprintf(f, "Commit.Squashed = %lld\n", self->num_squashed_uinst); fprintf(f, "Commit.Mispred = %lld\n", self->num_mispred_branch_uinst); fprintf( f, "Commit.PredAcc = %.4g\n", self->num_branch_uinst ? (double)(self->num_branch_uinst - self->num_mispred_branch_uinst) / self->num_branch_uinst : 0.0); fprintf(f, "\n"); /* Report for each core */ for (i = 0; i < x86_cpu_num_cores; i++) { /* Core */ core = self->cores[i]; fprintf(f, "\n; Statistics for core %d\n", core->id); fprintf(f, "[ c%d ]\n\n", core->id); /* Functional units */ X86CoreDumpFunctionalUnitsReport(core, f); /* Dispatch slots */ if (x86_cpu_dispatch_kind == x86_cpu_dispatch_kind_timeslice) { fprintf(f, "; Dispatch slots usage (sum = cycles * dispatch width)\n"); fprintf(f, "; used - dispatch slot was used by a non-spec uop\n"); fprintf(f, "; spec - used by a mispeculated uop\n"); fprintf(f, "; ctx - no context allocated to thread\n"); fprintf(f, "; uopq,rob,iq,lsq,rename - no space in structure\n"); DUMP_DISPATCH_STAT(used); DUMP_DISPATCH_STAT(spec); DUMP_DISPATCH_STAT(uop_queue); DUMP_DISPATCH_STAT(rob); DUMP_DISPATCH_STAT(iq); DUMP_DISPATCH_STAT(lsq); DUMP_DISPATCH_STAT(rename); DUMP_DISPATCH_STAT(ctx); fprintf(f, "\n"); } /* Dispatch stage */ fprintf(f, "; Dispatch stage\n"); X86CpuDumpUopReport(self, f, core->num_dispatched_uinst_array, "Dispatch", x86_cpu_dispatch_width); /* Issue stage */ fprintf(f, "; Issue stage\n"); X86CpuDumpUopReport(self, f, core->num_issued_uinst_array, "Issue", x86_cpu_issue_width); /* Commit stage */ fprintf(f, "; Commit stage\n"); X86CpuDumpUopReport(self, f, core->num_committed_uinst_array, "Commit", x86_cpu_commit_width); /* Committed branches */ fprintf(f, "; Committed branches\n"); fprintf(f, "Commit.Branches = %lld\n", core->num_branch_uinst); fprintf(f, "Commit.Squashed = %lld\n", core->num_squashed_uinst); fprintf(f, "Commit.Mispred = %lld\n", core->num_mispred_branch_uinst); fprintf(f, "Commit.PredAcc = %.4g\n", core->num_branch_uinst ? (double)(core->num_branch_uinst - core->num_mispred_branch_uinst) / core->num_branch_uinst : 0.0); fprintf(f, "\n"); /* Occupancy stats */ fprintf(f, "; Structure statistics (reorder buffer, instruction queue,\n"); fprintf(f, "; load-store queue, and integer/floating-point register file)\n"); fprintf(f, "; Size - Available size\n"); fprintf(f, "; Occupancy - Average number of occupied entries\n"); fprintf(f, "; Full - Number of cycles when the structure was full\n"); fprintf(f, "; Reads, Writes - Accesses to the structure\n"); if (x86_rob_kind == x86_rob_kind_shared) DUMP_CORE_STRUCT_STATS(ROB, rob); if (x86_iq_kind == x86_iq_kind_shared) { DUMP_CORE_STRUCT_STATS(IQ, iq); fprintf(f, "IQ.WakeupAccesses = %lld\n", core->iq_wakeup_accesses); } if (x86_lsq_kind == x86_lsq_kind_shared) DUMP_CORE_STRUCT_STATS(LSQ, lsq); if (x86_reg_file_kind == x86_reg_file_kind_shared) { DUMP_CORE_STRUCT_STATS(RF_Int, reg_file_int); DUMP_CORE_STRUCT_STATS(RF_Fp, reg_file_fp); } fprintf(f, "\n"); /* Report for each thread */ for (j = 0; j < x86_cpu_num_threads; j++) { thread = core->threads[j]; fprintf(f, "\n; Statistics for core %d - thread %d\n", core->id, thread->id_in_core); fprintf(f, "[ %s ]\n\n", thread->name); /* Dispatch stage */ fprintf(f, "; Dispatch stage\n"); X86CpuDumpUopReport(self, f, thread->num_dispatched_uinst_array, "Dispatch", x86_cpu_dispatch_width); /* Issue stage */ fprintf(f, "; Issue stage\n"); X86CpuDumpUopReport(self, f, thread->num_issued_uinst_array, "Issue", x86_cpu_issue_width); /* Commit stage */ fprintf(f, "; Commit stage\n"); X86CpuDumpUopReport(self, f, thread->num_committed_uinst_array, "Commit", x86_cpu_commit_width); /* Committed branches */ fprintf(f, "; Committed branches\n"); fprintf(f, "Commit.Branches = %lld\n", thread->num_branch_uinst); fprintf(f, "Commit.Squashed = %lld\n", thread->num_squashed_uinst); fprintf(f, "Commit.Mispred = %lld\n", thread->num_mispred_branch_uinst); fprintf(f, "Commit.PredAcc = %.4g\n", thread->num_branch_uinst ? (double)(thread->num_branch_uinst - thread->num_mispred_branch_uinst) / thread->num_branch_uinst : 0.0); fprintf(f, "\n"); /* Occupancy stats */ fprintf(f, "; Structure statistics (reorder buffer, instruction queue, " "load-store queue,\n"); fprintf(f, "; integer/floating-point register file, and renaming table)\n"); if (x86_rob_kind == x86_rob_kind_private) DUMP_THREAD_STRUCT_STATS(ROB, rob); if (x86_iq_kind == x86_iq_kind_private) { DUMP_THREAD_STRUCT_STATS(IQ, iq); fprintf(f, "IQ.WakeupAccesses = %lld\n", thread->iq_wakeup_accesses); } if (x86_lsq_kind == x86_lsq_kind_private) DUMP_THREAD_STRUCT_STATS(LSQ, lsq); if (x86_reg_file_kind == x86_reg_file_kind_private) { DUMP_THREAD_STRUCT_STATS(RF_Int, reg_file_int); DUMP_THREAD_STRUCT_STATS(RF_Fp, reg_file_fp); } fprintf(f, "RAT.IntReads = %lld\n", thread->rat_int_reads); fprintf(f, "RAT.IntWrites = %lld\n", thread->rat_int_writes); fprintf(f, "RAT.FpReads = %lld\n", thread->rat_fp_reads); fprintf(f, "RAT.FpWrites = %lld\n", thread->rat_fp_writes); fprintf(f, "BTB.Reads = %lld\n", thread->btb_reads); fprintf(f, "BTB.Writes = %lld\n", thread->btb_writes); fprintf(f, "\n"); /* Trace cache stats */ if (thread->trace_cache) X86ThreadDumpTraceCacheReport(thread, f); } } }
int EvgGpuRun(Timing *self) { EvgGpu *gpu = asEvgGpu(self); struct evg_ndrange_t *ndrange; struct evg_compute_unit_t *compute_unit; struct evg_compute_unit_t *compute_unit_next; /* For efficiency when no Evergreen emulation is selected, exit here * if the list of existing ND-Ranges is empty. */ if (!evg_emu->ndrange_list_count) return FALSE; /* Start one ND-Range in state 'pending' */ while ((ndrange = evg_emu->pending_ndrange_list_head)) { /* Currently not supported for more than 1 ND-Range */ if (gpu->ndrange) fatal("%s: Evergreen GPU timing simulation not supported for multiple ND-Ranges", __FUNCTION__); /* Set ND-Range status to 'running' */ evg_ndrange_clear_status(ndrange, evg_ndrange_pending); evg_ndrange_set_status(ndrange, evg_ndrange_running); /* Trace */ evg_trace("evg.new_ndrange " "id=%d " "wg_first=%d " "wg_count=%d\n", ndrange->id, ndrange->work_group_id_first, ndrange->work_group_count); /* Map ND-Range to GPU */ evg_gpu_map_ndrange(ndrange); evg_calc_plot(); } /* Mapped ND-Range */ ndrange = gpu->ndrange; assert(ndrange); /* Allocate work-groups to compute units */ while (gpu->ready_list_head && ndrange->pending_list_head) evg_compute_unit_map_work_group(gpu->ready_list_head, ndrange->pending_list_head); /* One more cycle */ asTiming(evg_gpu)->cycle++; /* Stop if maximum number of GPU cycles exceeded */ if (evg_emu_max_cycles && asTiming(evg_gpu)->cycle >= evg_emu_max_cycles) esim_finish = esim_finish_evg_max_cycles; /* Stop if maximum number of GPU instructions exceeded */ if (evg_emu_max_inst && asEmu(evg_emu)->instructions >= evg_emu_max_inst) esim_finish = esim_finish_evg_max_inst; /* Stop if there was a simulation stall */ if (asTiming(evg_gpu)->cycle - gpu->last_complete_cycle > 1000000) { warning("Evergreen GPU simulation stalled.\n%s", evg_err_stall); esim_finish = esim_finish_stall; } /* Stop if any reason met */ if (esim_finish) return TRUE; /* Free instructions in trash */ evg_gpu_uop_trash_empty(); /* Run one loop iteration on each busy compute unit */ for (compute_unit = gpu->busy_list_head; compute_unit; compute_unit = compute_unit_next) { /* Store next busy compute unit, since this can change * during the compute unit simulation loop iteration. */ compute_unit_next = compute_unit->busy_list_next; /* Run one cycle */ evg_compute_unit_run(compute_unit); } /* GPU-REL: insert stack faults */ evg_faults_insert(); /* If ND-Range finished execution in all compute units, free it. */ if (!gpu->busy_list_count) { /* Dump ND-Range report */ evg_ndrange_dump(ndrange, evg_emu_report_file); /* Stop if maximum number of kernels reached */ if (evg_emu_max_kernels && evg_emu->ndrange_count >= evg_emu_max_kernels) esim_finish = esim_finish_evg_max_kernels; /* Finalize and free ND-Range */ assert(evg_ndrange_get_status(ndrange, evg_ndrange_finished)); evg_gpu_uop_trash_empty(); evg_gpu_unmap_ndrange(); evg_ndrange_free(ndrange); } /* Still simulating */ return TRUE; }
void evg_gpu_dump_report(void) { struct evg_compute_unit_t *compute_unit; struct mod_t *local_mod; int compute_unit_id; FILE *f; double inst_per_cycle; double cf_inst_per_cycle; double alu_inst_per_cycle; double tex_inst_per_cycle; long long coalesced_reads; long long coalesced_writes; char vliw_occupancy[MAX_STRING_SIZE]; /* Open file */ f = file_open_for_write(evg_gpu_report_file_name); if (!f) return; /* Dump GPU configuration */ fprintf(f, ";\n; GPU Configuration\n;\n\n"); evg_config_dump(f); /* Report for device */ fprintf(f, ";\n; Simulation Statistics\n;\n\n"); inst_per_cycle = asTiming(evg_gpu)->cycle ? (double) asEmu(evg_emu)->instructions / asTiming(evg_gpu)->cycle : 0.0; fprintf(f, "[ Device ]\n\n"); fprintf(f, "NDRangeCount = %d\n", evg_emu->ndrange_count); fprintf(f, "Instructions = %lld\n", asEmu(evg_emu)->instructions); fprintf(f, "Cycles = %lld\n", asTiming(evg_gpu)->cycle); fprintf(f, "InstructionsPerCycle = %.4g\n", inst_per_cycle); fprintf(f, "\n\n"); /* Report for compute units */ EVG_GPU_FOREACH_COMPUTE_UNIT(compute_unit_id) { compute_unit = evg_gpu->compute_units[compute_unit_id]; local_mod = compute_unit->local_memory; inst_per_cycle = compute_unit->cycle ? (double) compute_unit->inst_count / compute_unit->cycle : 0.0; cf_inst_per_cycle = compute_unit->cycle ? (double) compute_unit->cf_engine.inst_count / compute_unit->cycle : 0.0; alu_inst_per_cycle = compute_unit->alu_engine.cycle ? (double) compute_unit->alu_engine.inst_count / compute_unit->alu_engine.cycle : 0.0; tex_inst_per_cycle = compute_unit->tex_engine.cycle ? (double) compute_unit->tex_engine.inst_count / compute_unit->tex_engine.cycle : 0.0; coalesced_reads = local_mod->reads - local_mod->effective_reads; coalesced_writes = local_mod->writes - local_mod->effective_writes; snprintf(vliw_occupancy, MAX_STRING_SIZE, "%lld %lld %lld %lld %lld", compute_unit->alu_engine.vliw_slots[0], compute_unit->alu_engine.vliw_slots[1], compute_unit->alu_engine.vliw_slots[2], compute_unit->alu_engine.vliw_slots[3], compute_unit->alu_engine.vliw_slots[4]); fprintf(f, "[ ComputeUnit %d ]\n\n", compute_unit_id); fprintf(f, "WorkGroupCount = %lld\n", compute_unit->mapped_work_groups); fprintf(f, "Instructions = %lld\n", compute_unit->inst_count); fprintf(f, "Cycles = %lld\n", compute_unit->cycle); fprintf(f, "InstructionsPerCycle = %.4g\n", inst_per_cycle); fprintf(f, "\n"); fprintf(f, "CFEngine.Instructions = %lld\n", compute_unit->cf_engine.inst_count); fprintf(f, "CFEngine.InstructionsPerCycle = %.4g\n", cf_inst_per_cycle); fprintf(f, "CFEngine.ALUClauseTriggers = %lld\n", compute_unit->cf_engine.alu_clause_trigger_count); fprintf(f, "CFEngine.TEXClauseTriggers = %lld\n", compute_unit->cf_engine.tex_clause_trigger_count); fprintf(f, "CFEngine.GlobalMemWrites = %lld\n", compute_unit->cf_engine.global_mem_write_count); fprintf(f, "\n"); fprintf(f, "ALUEngine.WavefrontCount = %lld\n", compute_unit->alu_engine.wavefront_count); fprintf(f, "ALUEngine.Instructions = %lld\n", compute_unit->alu_engine.inst_count); fprintf(f, "ALUEngine.InstructionSlots = %lld\n", compute_unit->alu_engine.inst_slot_count); fprintf(f, "ALUEngine.LocalMemorySlots = %lld\n", compute_unit->alu_engine.local_mem_slot_count); fprintf(f, "ALUEngine.VLIWOccupancy = %s\n", vliw_occupancy); fprintf(f, "ALUEngine.Cycles = %lld\n", compute_unit->alu_engine.cycle); fprintf(f, "ALUEngine.InstructionsPerCycle = %.4g\n", alu_inst_per_cycle); fprintf(f, "\n"); fprintf(f, "TEXEngine.WavefrontCount = %lld\n", compute_unit->tex_engine.wavefront_count); fprintf(f, "TEXEngine.Instructions = %lld\n", compute_unit->tex_engine.inst_count); fprintf(f, "TEXEngine.Cycles = %lld\n", compute_unit->tex_engine.cycle); fprintf(f, "TEXEngine.InstructionsPerCycle = %.4g\n", tex_inst_per_cycle); fprintf(f, "\n"); fprintf(f, "LocalMemory.Accesses = %lld\n", local_mod->reads + local_mod->writes); fprintf(f, "LocalMemory.Reads = %lld\n", local_mod->reads); fprintf(f, "LocalMemory.EffectiveReads = %lld\n", local_mod->effective_reads); fprintf(f, "LocalMemory.CoalescedReads = %lld\n", coalesced_reads); fprintf(f, "LocalMemory.Writes = %lld\n", local_mod->writes); fprintf(f, "LocalMemory.EffectiveWrites = %lld\n", local_mod->effective_writes); fprintf(f, "LocalMemory.CoalescedWrites = %lld\n", coalesced_writes); fprintf(f, "\n\n"); } }