static int X86ThreadDispatch(X86Thread *self, int quantum) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct x86_uop_t *uop; enum x86_dispatch_stall_t stall; while (quantum) { /* Check if we can decode */ stall = X86ThreadCanDispatch(self); if (stall != x86_dispatch_stall_used) { core->dispatch_stall[stall] += quantum; break; } /* Get entry from uop queue */ uop = list_remove_at(self->uop_queue, 0); assert(x86_uop_exists(uop)); uop->in_uop_queue = 0; /* Rename */ X86ThreadRenameUop(self, uop); /* Insert in ROB */ X86CoreEnqueueInROB(core, uop); core->rob_writes++; self->rob_writes++; /* Non memory instruction into IQ */ if (!(uop->flags & X86_UINST_MEM)) { X86ThreadInsertInIQ(self, uop); core->iq_writes++; self->iq_writes++; } /* Memory instructions into the LSQ */ if (uop->flags & X86_UINST_MEM) { X86ThreadInsertInLSQ(self, uop); core->lsq_writes++; self->lsq_writes++; } /* Statistics */ core->dispatch_stall[uop->specmode ? x86_dispatch_stall_spec : x86_dispatch_stall_used]++; self->num_dispatched_uinst_array[uop->uinst->opcode]++; core->num_dispatched_uinst_array[uop->uinst->opcode]++; cpu->num_dispatched_uinst_array[uop->uinst->opcode]++; if (uop->trace_cache) self->trace_cache->num_dispatched_uinst++; /* Another instruction dispatched, update quantum. */ quantum--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"di\"\n", uop->id_in_core, core->id); } return quantum; }
void x86_uop_queue_recover(int core, int thread) { struct list_t *uop_queue = X86_THREAD.uop_queue; struct x86_uop_t *uop; while (list_count(uop_queue)) { uop = list_get(uop_queue, list_count(uop_queue) - 1); assert(uop->thread == thread); if (!uop->specmode) break; list_remove_at(uop_queue, list_count(uop_queue) - 1); uop->in_uop_queue = 0; /* Trace */ if (x86_tracing()) { x86_trace("x86.inst id=%lld core=%d stg=\"sq\"\n", uop->id_in_core, uop->core); x86_cpu_uop_trace_list_add(uop); } /* Free */ x86_uop_free_if_not_queued(uop); } }
void X86ThreadRecoverFetchQueue(X86Thread *self) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct list_t *fetchq = self->fetch_queue; struct x86_uop_t *uop; while (list_count(fetchq)) { uop = list_get(fetchq, list_count(fetchq) - 1); assert(uop->thread == self); if (!uop->specmode) break; uop = X86ThreadRemoveFromFetchQueue(self, list_count(fetchq) - 1); /* Trace */ if (x86_tracing()) { x86_trace("x86.inst id=%lld core=%d stg=\"sq\"\n", uop->id_in_core, core->id); X86CpuAddToTraceList(cpu, uop); } /* Free */ x86_uop_free_if_not_queued(uop); } }
void x86_cpu_recover(int core, int thread) { struct x86_uop_t *uop; /* Remove instructions of this thread in fetch_queue, uop_queue, iq, sq, lq and event_queue. */ x86_fetch_queue_recover(core, thread); x86_uop_queue_recover(core, thread); x86_iq_recover(core, thread); x86_lsq_recover(core, thread); x86_event_queue_recover(core, thread); /* Remove instructions from ROB, restoring the state of the * physical register file. */ for (;;) { /* Get instruction */ uop = x86_rob_tail(core, thread); if (!uop) break; /* If we already removed all speculative instructions, * the work is finished */ assert(uop->core == core); assert(uop->thread == thread); if (!uop->specmode) break; /* Statistics */ if (uop->fetch_trace_cache) X86_THREAD.trace_cache->squashed++; X86_THREAD.squashed++; X86_CORE.squashed++; x86_cpu->squashed++; /* Undo map */ if (!uop->completed) x86_reg_file_write(uop); x86_reg_file_undo(uop); /* Trace */ if (x86_tracing()) { x86_trace("x86.inst id=%lld core=%d stg=\"sq\"\n", uop->id_in_core, uop->core); x86_cpu_uop_trace_list_add(uop); } /* Remove entry in ROB */ x86_rob_remove_tail(core, thread); } /* If we actually fetched wrong instructions, recover kernel */ if (x86_ctx_get_status(X86_THREAD.ctx, x86_ctx_spec_mode)) x86_ctx_recover(X86_THREAD.ctx); /* Stall fetch and set eip to fetch. */ X86_THREAD.fetch_stall_until = MAX(X86_THREAD.fetch_stall_until, x86_cpu->cycle + x86_cpu_recover_penalty - 1); X86_THREAD.fetch_neip = X86_THREAD.ctx->regs->eip; }
static void x86_cpu_decode_thread(int core, int thread) { struct list_t *fetchq = X86_THREAD.fetch_queue; struct list_t *uopq = X86_THREAD.uop_queue; struct x86_uop_t *uop; int i; for (i = 0; i < x86_cpu_decode_width; i++) { /* Empty fetch queue, full uop_queue */ if (!list_count(fetchq)) break; if (list_count(uopq) >= x86_uop_queue_size) break; uop = list_get(fetchq, 0); assert(x86_uop_exists(uop)); /* If instructions come from the trace cache, i.e., are located in * the trace cache queue, copy all of them * into the uop queue in one single decode slot. */ if (uop->trace_cache) { do { x86_fetch_queue_remove(core, thread, 0); list_add(uopq, uop); uop->in_uop_queue = 1; uop = list_get(fetchq, 0); } while (uop && uop->trace_cache); break; } /* Decode one macro-instruction coming from a block in the instruction * cache. If the cache access finished, extract it from the fetch queue. */ assert(!uop->mop_index); if (!mod_in_flight_access(X86_THREAD.inst_mod, uop->fetch_access, uop->fetch_address)) { do { /* Move from fetch queue to uop queue */ x86_fetch_queue_remove(core, thread, 0); list_add(uopq, uop); uop->in_uop_queue = 1; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"dec\"\n", uop->id_in_core, uop->core); /* Next */ uop = list_get(fetchq, 0); } while (uop && uop->mop_index); } } }
void x86_cpu_unmap_context(int core, int thread) { struct x86_ctx_t *ctx = X86_THREAD.ctx; assert(ctx); assert(x86_ctx_get_status(ctx, x86_ctx_alloc)); assert(!x86_ctx_get_status(ctx, x86_ctx_spec_mode)); assert(!X86_THREAD.rob_count); assert(ctx->dealloc_signal); assert(x86_cpu->ctx_dealloc_signals > 0); X86_THREAD.ctx = NULL; X86_THREAD.fetch_neip = 0; x86_ctx_clear_status(ctx, x86_ctx_alloc); ctx->dealloc_when = x86_cpu->cycle; ctx->dealloc_signal = 0; x86_cpu->ctx_dealloc_signals--; x86_ctx_debug("cycle %lld: ctx %d evicted from c%dt%d\n", x86_cpu->cycle, ctx->pid, core, thread); /* Trace */ x86_trace("x86.unmap_ctx ctx=%d core=%d thread=%d\n", ctx->pid, core, thread); /* If context is finished, free it. */ if (x86_ctx_get_status(ctx, x86_ctx_finished)) { /* Trace */ x86_trace("x86.end_ctx ctx=%d\n", ctx->pid); /* Free context */ x86_ctx_free(ctx); } }
void x86_cpu_uop_trace_list_empty(void) { struct linked_list_t *uop_trace_list; struct x86_uop_t *uop; uop_trace_list = x86_cpu->uop_trace_list; while (uop_trace_list->count) { /* Remove from list */ linked_list_head(uop_trace_list); uop = linked_list_get(uop_trace_list); linked_list_remove(uop_trace_list); assert(uop->in_uop_trace_list); /* Trace */ x86_trace("x86.end_inst id=%lld core=%d\n", uop->id_in_core, uop->core); /* Free uop */ uop->in_uop_trace_list = 0; x86_uop_free_if_not_queued(uop); } }
void x86_cpu_map_context(int core, int thread, struct x86_ctx_t *ctx) { assert(!X86_THREAD.ctx); assert(!x86_ctx_get_status(ctx, x86_ctx_alloc)); assert(x86_emu->alloc_list_count < x86_cpu_num_cores * x86_cpu_num_threads); assert(!ctx->dealloc_signal); X86_THREAD.ctx = ctx; X86_THREAD.last_alloc_pid = ctx->pid; X86_THREAD.fetch_neip = ctx->regs->eip; x86_ctx_set_status(ctx, x86_ctx_alloc); ctx->alloc_core = core; ctx->alloc_thread = thread; ctx->alloc_when = x86_cpu->cycle; x86_ctx_debug("cycle %lld: ctx %d allocated to c%dt%d\n", x86_cpu->cycle, ctx->pid, core, thread); /* Trace */ x86_trace("x86.map_ctx ctx=%d core=%d thread=%d ppid=%d\n", ctx->pid, core, thread, ctx->parent ? ctx->parent->pid : 0); }
void X86CpuEmptyTraceList(X86Cpu *self) { X86Thread *thread; X86Core *core; struct linked_list_t *uop_trace_list; struct x86_uop_t *uop; uop_trace_list = self->uop_trace_list; while (uop_trace_list->count) { /* Remove from list */ linked_list_head(uop_trace_list); uop = linked_list_get(uop_trace_list); thread = uop->thread; core = thread->core; linked_list_remove(uop_trace_list); assert(uop->in_uop_trace_list); /* Trace */ x86_trace("x86.end_inst id=%lld core=%d\n", uop->id_in_core, core->id); /* Free uop */ uop->in_uop_trace_list = 0; x86_uop_free_if_not_queued(uop); } }
static int X86ThreadIssueIQ(X86Thread *self, int quant) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; struct linked_list_t *iq = self->iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !X86ThreadIsUopReady(self, uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'X86ThreadIsUopReady' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'X86CoreReserveFunctionalUnit' * returns 1 cycle latency. If there is no functional unit available, * 'X86CoreReserveFunctionalUnit' returns 0. */ lat = X86CoreReserveFunctionalUnit(core, uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ X86ThreadRemoveFromIQ(self); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = asTiming(cpu)->cycle; uop->when = asTiming(cpu)->cycle + lat; X86CoreInsertInEventQueue(core, uop); /* Statistics */ core->num_issued_uinst_array[uop->uinst->opcode]++; core->iq_reads++; core->reg_file_int_reads += uop->ph_int_idep_count; core->reg_file_fp_reads += uop->ph_fp_idep_count; self->num_issued_uinst_array[uop->uinst->opcode]++; self->iq_reads++; self->reg_file_int_reads += uop->ph_int_idep_count; self->reg_file_fp_reads += uop->ph_fp_idep_count; cpu->num_issued_uinst_array[uop->uinst->opcode]++; if (uop->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, core->id); } return quant; }
static int X86ThreadIssuePreQ(X86Thread *self, int quantum) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *preq = self->preq; struct x86_uop_t *prefetch; /* Process preq */ linked_list_head(preq); while (!linked_list_is_end(preq) && quantum) { /* Get element from prefetch queue. If it is not ready, go to the next one */ prefetch = linked_list_get(preq); if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch)) { linked_list_next(preq); continue; } /* * Make sure its not been prefetched recently. This is just to avoid unnecessary * memory traffic. Even though the cache will realise a "hit" on redundant * prefetches, its still helpful to avoid going to the memory (cache). */ if (prefetch_history_is_redundant(core->prefetch_history, self->data_mod, prefetch->phy_addr)) { /* remove from queue. do not prefetch. */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); prefetch->completed = 1; x86_uop_free_if_not_queued(prefetch); continue; } prefetch->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, prefetch->phy_addr)) { linked_list_next(preq); continue; } /* Remove from prefetch queue */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); /* Access memory system */ mod_access(self->data_mod, mod_access_prefetch, prefetch->phy_addr, NULL, core->event_queue, prefetch, NULL); /* Record prefetched address */ prefetch_history_record(core->prefetch_history, prefetch->phy_addr); /* The cache system will place the prefetch at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ prefetch->in_event_queue = 1; prefetch->issued = 1; prefetch->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[prefetch->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += prefetch->ph_int_idep_count; core->reg_file_fp_reads += prefetch->ph_fp_idep_count; self->num_issued_uinst_array[prefetch->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += prefetch->ph_int_idep_count; self->reg_file_fp_reads += prefetch->ph_fp_idep_count; cpu->num_issued_uinst_array[prefetch->uinst->opcode]++; if (prefetch->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quantum--; /* MMU statistics */ MMUAccessPage(cpu->mmu, prefetch->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", prefetch->id_in_core, core->id); } return quantum; }
static int X86ThreadIssueLQ(X86Thread *self, int quant) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *lq = self->lq; struct x86_uop_t *load; struct mod_client_info_t *client_info; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !X86ThreadIsUopReady(self, load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); X86ThreadRemoveFromLQ(self); /* create and fill the mod_client_info_t object */ client_info = mod_client_info_create(self->data_mod); client_info->prefetcher_eip = load->eip; /* Access memory system */ mod_access(self->data_mod, mod_access_load, load->phy_addr, NULL, core->event_queue, load, client_info); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[load->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += load->ph_int_idep_count; core->reg_file_fp_reads += load->ph_fp_idep_count; self->num_issued_uinst_array[load->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += load->ph_int_idep_count; self->reg_file_fp_reads += load->ph_fp_idep_count; cpu->num_issued_uinst_array[load->uinst->opcode]++; if (load->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* MMU statistics */ MMUAccessPage(cpu->mmu, load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, core->id); } return quant; }
/* * ACC call #2 - accArith * * accArith - Arithmatic calculation for Whetstone Benchmark * * @return * The function always returns 0 if running properly; * ruturns -1 if illegal input value is detected */ static int x86_acc_func_accArith (struct x86_ctx_t *ctx) { int core = 0; int thread = 0; struct x86_regs_t *regs = ctx->regs; struct mem_t *mem = ctx->mem; unsigned int args_ptr; double func_args[4]; /* Read arguments */ args_ptr = regs->ecx; printf ("args_ptr = %u(0x%x)\n\n",args_ptr,args_ptr); func_args[0] = 1.0; func_args[1] = -1.0; func_args[2] = -1.0; func_args[3] = -1.0; /* Get function info */ //mem_read(mem, args_ptr, sizeof(double), func_args); /* mem_read(mem, args_ptr+4, 8, func_args[1] ); mem_read(mem, args_ptr+8, 8, func_args[2] ); mem_read(mem, args_ptr+12, 8, func_args[3] ); mem_read(mem, args_ptr+16, 8, func_args[4] ); */ //func_args[0] = &args_ptr; //func_args[1] = &args_ptr+8; //func_args[2] = &args_ptr+16; //func_args[3] = &args_ptr+24; //func_args[4] = &args_ptr+60; printf("*******************************\n"); printf("In Emulation\n"); //printf("\t\tfunc_args = %u (0x%x)\n", func_args, func_args); printf ("Cycle when getting into this call is %lld\n\n", x86_cpu->cycle); /***********************************************/ struct linked_list_t *sq = X86_THREAD.sq; struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *store; struct x86_uop_t *load; int quant = x86_cpu_issue_width; linked_list_head(sq); while (!linked_list_is_end(sq)&& quant ) { store = linked_list_get(sq); printf ("physical addr @ store: %d\n",store->phy_addr); //assert(store->uinst->opcode == x86_uinst_store); if (!store->ready && !x86_reg_file_ready(store)) { linked_list_next(sq); continue; } store->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { //printf("Debug Point 5\n"); linked_list_next(sq); continue; } int i = 9000; while (i--) { //printf("Debug Point 6\n"); mod_access(X86_THREAD.data_mod, mod_access_store, store->phy_addr, NULL, X86_CORE.event_queue, store); } quant--; // MMU statistics if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } quant = x86_cpu_issue_width; linked_list_head(lq); while (!linked_list_is_end(lq)&& quant ) { load = linked_list_get(lq); printf ("physical add @ load: %d\n",load->phy_addr); //assert(store->uinst->opcode == x86_uinst_store); load->ready = 1; if (!load->ready && !x86_reg_file_ready(load)) { printf("load debug point 1\n"); linked_list_next(sq); continue; } load->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { printf("load debug point 2\n"); linked_list_next(lq); continue; } int j = 9000; while (j--) { //printf("load debug point 3\n"); mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); } quant--; //printf("load debug point 4: quant = %d\n", quant); // MMU statistics if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); // Trace x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } /***********************************************/ /* printf("\t\tfunc_args[0] = %u (0x%x)\n", func_args[0], func_args[0]); printf("\t\tfunc_args[1] = %u (0x%x)\n", func_args[1], func_args[1]); printf("\t\tfunc_args[2] = %u (0x%x)\n", func_args[2], func_args[2]); printf("\t\tfunc_args[3] = %u (0x%x)\n", func_args[3], func_args[3]); printf("\t\tfunc_args[4] = %u (0x%x)\n", func_args[4], func_args[4]); printf("get here 1\n"); */ /* printf("Value:\n\t\tfunc_args[0] = %f (0x%x)\n", *func_args[0], *func_args[0]); printf("\t\tfunc_args[1] = %f (0x%x)\n", *func_args[1], *func_args[1]); printf("\t\tfunc_args[2] = %f (0x%x)\n", *func_args[2], *func_args[2]); printf("\t\tfunc_args[3] = %f (0x%x)\n", *func_args[3], *func_args[3]); printf("\t\tfunc_args[4] = %f (0x%x)\n", *func_args[4], *func_args[4]); */ /* double *A0 = func_args[0]; double *A1 = func_args[1]; double *A2 = func_args[2]; double *A3 = func_args[3]; double *A4 = func_args[4]; */ //double N = *A0; //printf("get here\n"); /* printf("\t\tN = %f (0x%x)\n", *A0,*A0); printf("\t\tA1 = %f (0x%x)\n", *A1,*A1); printf("\t\tA2 = %f (0x%x)\n", *A2,*A2); printf("\t\tA3 = %f (0x%x)\n", *A3,*A3); printf("\t\tA4 = %f (0x%x)\n", *A4,*A4); */ double T = 0.499975; printf ("func_args1 = %f, func_args2 = %f, func_args3 = %f, func_args4 = %f\n",func_args[0],func_args[1],func_args[2],func_args[3]); /* func_args[0] = (func_args[0] + func_args[1] + func_args[2] - func_args[3])*T; func_args[1] = (func_args[0] + func_args[1] - func_args[2] + func_args[3])*T; func_args[2] = (func_args[0] - func_args[1] + func_args[2] - func_args[3])*T; func_args[3] = (-func_args[0] + func_args[1] + func_args[2] + func_args[3])*T; */ return 0; }
static int x86_acc_func_accDTW (struct x86_ctx_t *ctx) { struct x86_regs_t *regs = ctx->regs; struct mem_t *mem = ctx->mem; int core; int thread; unsigned int args_ptr; //int x; struct arglist func_args; /* Read arguments */ args_ptr = regs->ecx; /* Get function info */ mem_read(mem, args_ptr, sizeof(arglist), &func_args); printf("\t\t**sample1 = %p (%p)\n", func_args.sample1, &(func_args.sample1[0][0])); printf("\t\tlength1 = %u (%p)\n", func_args.length1, &func_args.length1); printf("\t\t**sample2 = %p (%p)\n", func_args.sample2, &(func_args.sample2[0][0])); printf("\t\tlength2 = %u (%p)\n", func_args.length2, &func_args.length2); printf("\t\ti = %u (%p)\n", func_args.i, &func_args.i); printf("\t\tj = %u (%p)\n", func_args.j, &func_args.j); printf("\t\t*table = %p (%p)\n", func_args.table, &(func_args.table[0])); /***********************************************/ #define L2ONLY #define WITHACC #ifdef L2ONLY printf ("Cache Behavior Simulation\n"); char * mod_name = "mod-l2-0"; X86_THREAD.data_mod = mem_system_get_mod (mod_name); #endif #ifdef WITHACC struct linked_list_t *sq = X86_THREAD.sq; struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *store; struct x86_uop_t *load; int quant = x86_cpu_issue_width; unsigned int count1, count2; for (count1 = 0; count1 < 124; count1 ++) { for(count2 = 0; count2 < 124; count2++) { linked_list_head(sq); while (!linked_list_is_end(sq)&& quant ) { store = linked_list_get(sq); printf("\n\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n\n"); printf("physical addr @ store: %d\n",store->phy_addr); assert(store->uinst->opcode == x86_uinst_store); if (!store->ready && !x86_reg_file_ready(store)) { linked_list_next(sq); continue; } store->ready = 1; printf ("data module kind: %d\n",X86_THREAD.data_mod->kind); printf ("data module level: %d\n",X86_THREAD.data_mod->level); printf ("data module name: %s\n",X86_THREAD.data_mod->name); printf ("data module cache name: %s\n",X86_THREAD.data_mod->cache->name); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { linked_list_next(sq); continue; } int i = 3; while (i--) { printf("Store Debug Point 6\n"); mod_access(X86_THREAD.data_mod, mod_access_store, store->phy_addr, NULL, X86_CORE.event_queue, store); } quant--; // MMU statistics if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } quant = x86_cpu_issue_width; //printf("Load Simulation ... \n"); linked_list_head(lq); while (!linked_list_is_end(lq)&& quant ) { load = linked_list_get(lq); printf ("physical add @ load: %d\n",load->phy_addr); assert(store->uinst->opcode == x86_uinst_store); load->ready = 1; if (!load->ready && !x86_reg_file_ready(load)) { printf("load debug point 1\n"); linked_list_next(sq); continue; } load->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { printf("load debug point 2\n"); linked_list_next(lq); continue; } int j = 1; while (j--) { printf("load debug point 3\n"); mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); } quant--; //printf("load debug point 4: quant = %d\n", quant); // MMU statistics if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); // Trace x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } } } #endif #ifdef L2ONLY mod_name = "mod-dl1-0"; X86_THREAD.data_mod = mem_system_get_mod (mod_name); #endif /***********************************************/ int ret = DTWdistance(func_args.sample1, func_args.length1, func_args.sample2, func_args.length2, func_args.i, func_args.j, func_args.table); return ret; }
/* Run the emulation of one x86 macro-instruction and create its uops. * If any of the uops is a control uop, this uop will be the return value of * the function. Otherwise, the first decoded uop is returned. */ static struct x86_uop_t *X86ThreadFetchInst(X86Thread *self, int fetch_trace_cache) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; X86Context *ctx = self->ctx; struct x86_uop_t *uop; struct x86_uop_t *ret_uop; struct x86_uinst_t *uinst; int uinst_count; int uinst_index; /* Functional simulation */ self->fetch_eip = self->fetch_neip; X86ContextSetEip(ctx, self->fetch_eip); X86ContextExecute(ctx); self->fetch_neip = self->fetch_eip + ctx->inst.size; /* If no micro-instruction was generated by this instruction, create a * 'nop' micro-instruction. This makes sure that there is always a micro- * instruction representing the regular control flow of macro-instructions * of the program. It is important for the traces stored in the trace * cache. */ if (!x86_uinst_list->count) x86_uinst_new(ctx, x86_uinst_nop, 0, 0, 0, 0, 0, 0, 0); /* Micro-instructions created by the x86 instructions can be found now * in 'x86_uinst_list'. */ uinst_count = list_count(x86_uinst_list); uinst_index = 0; ret_uop = NULL; while (list_count(x86_uinst_list)) { /* Get uinst from head of list */ uinst = list_remove_at(x86_uinst_list, 0); /* Create uop */ uop = x86_uop_create(); uop->uinst = uinst; assert(uinst->opcode >= 0 && uinst->opcode < x86_uinst_opcode_count); uop->flags = x86_uinst_info[uinst->opcode].flags; uop->id = cpu->uop_id_counter++; uop->id_in_core = core->uop_id_counter++; uop->ctx = ctx; uop->thread = self; uop->mop_count = uinst_count; uop->mop_size = ctx->inst.size; uop->mop_id = uop->id - uinst_index; uop->mop_index = uinst_index; uop->eip = self->fetch_eip; uop->in_fetch_queue = 1; uop->trace_cache = fetch_trace_cache; uop->specmode = X86ContextGetState(ctx, X86ContextSpecMode); uop->fetch_address = self->fetch_address; uop->fetch_access = self->fetch_access; uop->neip = ctx->regs->eip; uop->pred_neip = self->fetch_neip; uop->target_neip = ctx->target_eip; /* Process uop dependences and classify them in integer, floating-point, * flags, etc. */ x86_uop_count_deps(uop); /* Calculate physical address of a memory access */ if (uop->flags & X86_UINST_MEM) { if (uinst->address == ctx->mem_mod_low && ctx->mem_mod_low!=0) { //fprintf(stderr, "%x, low\n", uinst->address); ctx->mem_low = uop->data-(uop->data & (self->data_mod->block_size-1)); uop->addr = uinst->address; } else if (uinst->address == ctx->mem_mod_high && ctx->mem_mod_high!=0 ) { //fprintf(stderr, "%x, high\n", uinst->address); ctx->mem_high = uop->data | (self->data_mod->block_size-1); uop->addr = uinst->address; } else if (!FPGARegCheck(ctx, uop, uinst->address)) { if (self->standalone) { uop->phy_addr = uinst->address; uop->addr = uinst->address; mem_read_copy(ctx->mem, uop->addr, 4, &(uop->data)); } else { uop->phy_addr = mmu_translate( self->ctx->address_space_index, uinst->address); uop->addr = uinst->address; mem_read_copy(ctx->mem, uop->addr, 4, &(uop->data)); } } } /* Trace */ if (x86_tracing()) { char str[MAX_STRING_SIZE]; char inst_name[MAX_STRING_SIZE]; char uinst_name[MAX_STRING_SIZE]; char *str_ptr; int str_size; str_ptr = str; str_size = sizeof str; /* Command */ str_printf(&str_ptr, &str_size, "x86.new_inst id=%lld core=%d", uop->id_in_core, core->id); /* Speculative mode */ if (uop->specmode) str_printf(&str_ptr, &str_size, " spec=\"t\""); /* Macro-instruction name */ if (!uinst_index) { x86_inst_dump_buf(&ctx->inst, inst_name, sizeof inst_name); str_printf(&str_ptr, &str_size, " asm=\"%s\"", inst_name); } /* Rest */ x86_uinst_dump_buf(uinst, uinst_name, sizeof uinst_name); str_printf(&str_ptr, &str_size, " uasm=\"%s\" stg=\"fe\"", uinst_name); /* Dump */ x86_trace("%s\n", str); } /* Select as returned uop */ if (!ret_uop || (uop->flags & X86_UINST_CTRL)) ret_uop = uop; /* Insert into fetch queue */ list_add(self->fetch_queue, uop); if (fetch_trace_cache) self->trace_cache_queue_occ++; /* Statistics */ cpu->num_fetched_uinst++; self->num_fetched_uinst++; if (fetch_trace_cache) self->trace_cache->num_fetched_uinst++; /* Next uinst */ uinst_index++; } /* Increase fetch queue occupancy if instruction does not come from * trace cache, and return. */ if (ret_uop && !fetch_trace_cache) self->fetchq_occ += ret_uop->mop_size; return ret_uop; }
void X86ThreadRecover(X86Thread *self) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; struct x86_uop_t *uop; /* Remove instructions of this thread in fetch queue, uop queue, * instruction queue, store queue, load queue, and event queue. */ X86ThreadRecoverFetchQueue(self); X86ThreadRecoverUopQueue(self); X86ThreadRecoverIQ(self); X86ThreadRecoverLSQ(self); X86ThreadRecoverEventQueue(self); /* Remove instructions from ROB, restoring the state of the * physical register file. */ for (;;) { /* Get instruction */ uop = X86ThreadGetROBTail(self); if (!uop) break; /* If we already removed all speculative instructions, * the work is finished */ assert(uop->thread == self); if (!uop->specmode) break; /* Statistics */ if (uop->trace_cache) self->trace_cache->num_squashed_uinst++; self->num_squashed_uinst++; core->num_squashed_uinst++; cpu->num_squashed_uinst++; /* Undo map */ if (!uop->completed) X86ThreadWriteUop(self, uop); X86ThreadUndoUop(self, uop); /* Trace */ if (x86_tracing()) { x86_trace("x86.inst id=%lld core=%d stg=\"sq\"\n", uop->id_in_core, core->id); x86_cpu_uop_trace_list_add(uop); } /* Remove entry in ROB */ X86ThreadRemoveROBTail(self); } /* Check state of fetch stage and mapped context, if still any */ if (self->ctx) { /* If we actually fetched wrong instructions, recover emulator */ if (X86ContextGetState(self->ctx, X86ContextSpecMode)) X86ContextRecover(self->ctx); /* Stall fetch and set eip to fetch. */ self->fetch_stall_until = MAX(self->fetch_stall_until, asTiming(x86_cpu)->cycle + x86_cpu_recover_penalty - 1); self->fetch_neip = self->ctx->regs->eip; } }
static int x86_cpu_dispatch_thread(int core, int thread, int quant) { struct x86_uop_t *uop; enum x86_dispatch_stall_t stall; while (quant) { /* Check if we can decode */ stall = x86_cpu_can_dispatch_thread(core, thread); if (stall != x86_dispatch_stall_used) { X86_CORE.dispatch_stall[stall] += quant; break; } /* Get entry from uop queue */ uop = list_remove_at(X86_THREAD.uop_queue, 0); assert(x86_uop_exists(uop)); uop->in_uop_queue = 0; /* Rename */ x86_reg_file_rename(uop); /* Insert in ROB */ x86_rob_enqueue(uop); X86_CORE.rob_writes++; X86_THREAD.rob_writes++; /* Non memory instruction into IQ */ if (!(uop->flags & X86_UINST_MEM)) { x86_iq_insert(uop); X86_CORE.iq_writes++; X86_THREAD.iq_writes++; } /* Memory instructions into the LSQ */ if (uop->flags & X86_UINST_MEM) { x86_lsq_insert(uop); X86_CORE.lsq_writes++; X86_THREAD.lsq_writes++; } /* Statistics */ X86_CORE.dispatch_stall[uop->specmode ? x86_dispatch_stall_spec : x86_dispatch_stall_used]++; X86_THREAD.num_dispatched_uinst_array[uop->uinst->opcode]++; X86_CORE.num_dispatched_uinst_array[uop->uinst->opcode]++; x86_cpu->num_dispatched_uinst_array[uop->uinst->opcode]++; if (uop->trace_cache) X86_THREAD.trace_cache->num_dispatched_uinst++; /* Another instruction dispatched, update quantum. */ quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"di\"\n", uop->id_in_core, uop->core); } return quant; }
static int x86_cpu_issue_lq(int core, int thread, int quant) { struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *load; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !x86_reg_file_ready(load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); x86_lq_remove(core, thread); /* Access memory system */ mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = x86_cpu->cycle; /* Instruction issued */ X86_CORE.issued[load->uinst->opcode]++; X86_CORE.lsq_reads++; X86_CORE.reg_file_int_reads += load->ph_int_idep_count; X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count; X86_THREAD.issued[load->uinst->opcode]++; X86_THREAD.lsq_reads++; X86_THREAD.reg_file_int_reads += load->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count; x86_cpu->issued[load->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } return quant; }
static int x86_cpu_issue_iq(int core, int thread, int quant) { struct linked_list_t *iq = X86_THREAD.iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !x86_reg_file_ready(uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'x86_reg_file_ready' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'x86_fu_reserve' * returns 1 cycle latency. If there is no functional unit available, * 'x86_fu_reserve' returns 0. */ lat = x86_fu_reserve(uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ x86_iq_remove(core, thread); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = x86_cpu->cycle; uop->when = x86_cpu->cycle + lat; x86_event_queue_insert(X86_CORE.event_queue, uop); /* Instruction issued */ X86_CORE.issued[uop->uinst->opcode]++; X86_CORE.iq_reads++; X86_CORE.reg_file_int_reads += uop->ph_int_idep_count; X86_CORE.reg_file_fp_reads += uop->ph_fp_idep_count; X86_THREAD.issued[uop->uinst->opcode]++; X86_THREAD.iq_reads++; X86_THREAD.reg_file_int_reads += uop->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += uop->ph_fp_idep_count; x86_cpu->issued[uop->uinst->opcode]++; quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, uop->core); } return quant; }