/* Remove all speculative uops from a given load/store queue in the * given thread. */ void X86ThreadRecoverLSQ(X86Thread *self) { struct linked_list_t *lq = self->lq; struct linked_list_t *sq = self->sq; struct x86_uop_t *uop; /* Recover load queue */ linked_list_head(lq); while (!linked_list_is_end(lq)) { uop = linked_list_get(lq); if (uop->specmode) { X86ThreadRemoveFromLQ(self); x86_uop_free_if_not_queued(uop); continue; } linked_list_next(lq); } /* Recover store queue */ linked_list_head(sq); while (!linked_list_is_end(sq)) { uop = linked_list_get(sq); if (uop->specmode) { X86ThreadRemoveFromSQ(self); x86_uop_free_if_not_queued(uop); continue; } linked_list_next(sq); } }
static int X86ThreadIssueSQ(X86Thread *self, int quantum) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; struct x86_uop_t *store; struct linked_list_t *sq = self->sq; struct mod_client_info_t *client_info; /* Process SQ */ linked_list_head(sq); while (!linked_list_is_end(sq) && quantum) { /* Get store */ store = linked_list_get(sq); assert(store->uinst->opcode == x86_uinst_store); /* Only committed stores issue */ if (store->in_rob) break; /* Check that memory system entry is ready */ if (!mod_can_access(self->data_mod, store->phy_addr)) break; /* Remove store from store queue */ X86ThreadRemoveFromSQ(self); /* create and fill the mod_client_info_t object */ client_info = mod_client_info_create(self->data_mod); client_info->prefetcher_eip = store->eip; /* Issue store */ mod_access(self->data_mod, mod_access_store, store->phy_addr, NULL, core->event_queue, store, client_info); /* The cache system will place the store at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ store->in_event_queue = 1; store->issued = 1; store->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[store->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += store->ph_int_idep_count; core->reg_file_fp_reads += store->ph_fp_idep_count; self->num_issued_uinst_array[store->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += store->ph_int_idep_count; self->reg_file_fp_reads += store->ph_fp_idep_count; cpu->num_issued_uinst_array[store->uinst->opcode]++; if (store->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction, update quantum. */ quantum--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } return quantum; }
static int issue_sq(int core, int thread, int quant) { struct uop_t *store; struct linked_list_t *sq = THREAD.sq; /* Process SQ */ linked_list_head(sq); while (!linked_list_is_end(sq) && quant) { /* Get store */ store = linked_list_get(sq); assert(store->uinst->opcode == x86_uinst_store); /* Only committed stores issue */ if (store->in_rob) break; /* Check that memory system entry is ready */ if (!mod_can_access(THREAD.data_mod, store->phy_addr)) break; /* Remove store from store queue */ sq_remove(core, thread); /* Issue store */ mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_write, store->phy_addr, NULL, CORE.eventq, store); /* The cache system will place the store at the head of the * event queue when it is ready. For now, mark "in_eventq" to * prevent the uop from being freed. */ store->in_eventq = 1; store->issued = 1; store->issue_when = cpu->cycle; /* Instruction issued */ CORE.issued[store->uinst->opcode]++; CORE.lsq_reads++; CORE.rf_int_reads += store->ph_int_idep_count; CORE.rf_fp_reads += store->ph_fp_idep_count; THREAD.issued[store->uinst->opcode]++; THREAD.lsq_reads++; THREAD.rf_int_reads += store->ph_int_idep_count; THREAD.rf_fp_reads += store->ph_fp_idep_count; cpu->issued[store->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); /* Debug */ esim_debug("uop action=\"update\", core=%d, seq=%llu," " stg_issue=1, in_lsq=0, issued=1\n", store->core, (long long unsigned) store->di_seq); } return quant; }
static int x86_cpu_issue_sq(int core, int thread, int quant) { struct x86_uop_t *store; struct linked_list_t *sq = X86_THREAD.sq; /* Process SQ */ linked_list_head(sq); while (!linked_list_is_end(sq) && quant) { /* Get store */ store = linked_list_get(sq); assert(store->uinst->opcode == x86_uinst_store); /* Only committed stores issue */ if (store->in_rob) break; /* Check that memory system entry is ready */ if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) break; /* Remove store from store queue */ x86_sq_remove(core, thread); /* Issue store */ mod_access(X86_THREAD.data_mod, mod_access_store, store->phy_addr, NULL, X86_CORE.event_queue, store); /* The cache system will place the store at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ store->in_event_queue = 1; store->issued = 1; store->issue_when = x86_cpu->cycle; /* Instruction issued */ X86_CORE.issued[store->uinst->opcode]++; X86_CORE.lsq_reads++; X86_CORE.reg_file_int_reads += store->ph_int_idep_count; X86_CORE.reg_file_fp_reads += store->ph_fp_idep_count; X86_THREAD.issued[store->uinst->opcode]++; X86_THREAD.lsq_reads++; X86_THREAD.reg_file_int_reads += store->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += store->ph_fp_idep_count; x86_cpu->issued[store->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } return quant; }
/* Update 'uop->ready' field of all instructions in a list as per the result * obtained by 'rf_ready'. The 'uop->ready' field is redundant and should always * match the return value of 'rf_ready' while an uop is in the ROB. * A debug message is dumped when the uop transitions to ready. */ void uop_lnlist_check_if_ready(struct linked_list_t *uop_list) { struct uop_t *uop; linked_list_head(uop_list); for (linked_list_head(uop_list); !linked_list_is_end(uop_list); linked_list_next(uop_list)) { uop = linked_list_get(uop_list); if (uop->ready || !rf_ready(uop)) continue; uop->ready = 1; esim_debug("uop action=\"update\", core=%d, seq=%lld, ready=1\n", uop->core, uop->di_seq); } }
void x86_uop_linked_list_dump(struct linked_list_t *uop_list, FILE *f) { struct x86_uop_t *uop; linked_list_head(uop_list); while (!linked_list_is_end(uop_list)) { uop = linked_list_get(uop_list); fprintf(f, "%3d. ", linked_list_current(uop_list)); x86_uinst_dump(uop->uinst, f); fprintf(f, "\n"); linked_list_next(uop_list); } }
void X86ThreadRecoverEventQueue(X86Thread *self) { X86Core *core = self->core; struct linked_list_t *event_queue = core->event_queue; struct x86_uop_t *uop; linked_list_head(event_queue); while (!linked_list_is_end(event_queue)) { uop = linked_list_get(event_queue); if (uop->thread == self && uop->specmode) { linked_list_remove(event_queue); uop->in_event_queue = 0; x86_uop_free_if_not_queued(uop); continue; } linked_list_next(event_queue); } }
static int issue_iq(int core, int thread, int quant) { struct linked_list_t *iq = THREAD.iq; struct uop_t *uop; int lat; /* Debug */ if (esim_debug_file) uop_lnlist_check_if_ready(iq); /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !rf_ready(uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'rf_ready' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'fu_reserve' * returns 1 cycle latency. If there is no functional unit available, * 'fu_reserve' returns 0. */ lat = fu_reserve(uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ iq_remove(core, thread); /* Schedule inst in Event Queue */ assert(!uop->in_eventq); assert(lat > 0); uop->issued = 1; uop->issue_when = cpu->cycle; uop->when = cpu->cycle + lat; eventq_insert(CORE.eventq, uop); /* Instruction issued */ CORE.issued[uop->uinst->opcode]++; CORE.iq_reads++; CORE.rf_int_reads += uop->ph_int_idep_count; CORE.rf_fp_reads += uop->ph_fp_idep_count; THREAD.issued[uop->uinst->opcode]++; THREAD.iq_reads++; THREAD.rf_int_reads += uop->ph_int_idep_count; THREAD.rf_fp_reads += uop->ph_fp_idep_count; cpu->issued[uop->uinst->opcode]++; quant--; /* Debug */ esim_debug("uop action=\"update\", core=%d, seq=%llu," " stg_issue=1, in_iq=0, issued=1\n", uop->core, (long long unsigned) uop->di_seq); } return quant; }
static int X86ThreadIssueIQ(X86Thread *self, int quant) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; struct linked_list_t *iq = self->iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !X86ThreadIsUopReady(self, uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'X86ThreadIsUopReady' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'X86CoreReserveFunctionalUnit' * returns 1 cycle latency. If there is no functional unit available, * 'X86CoreReserveFunctionalUnit' returns 0. */ lat = X86CoreReserveFunctionalUnit(core, uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ X86ThreadRemoveFromIQ(self); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = asTiming(cpu)->cycle; uop->when = asTiming(cpu)->cycle + lat; X86CoreInsertInEventQueue(core, uop); /* Statistics */ core->num_issued_uinst_array[uop->uinst->opcode]++; core->iq_reads++; core->reg_file_int_reads += uop->ph_int_idep_count; core->reg_file_fp_reads += uop->ph_fp_idep_count; self->num_issued_uinst_array[uop->uinst->opcode]++; self->iq_reads++; self->reg_file_int_reads += uop->ph_int_idep_count; self->reg_file_fp_reads += uop->ph_fp_idep_count; cpu->num_issued_uinst_array[uop->uinst->opcode]++; if (uop->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, core->id); } return quant; }
static int X86ThreadIssuePreQ(X86Thread *self, int quantum) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *preq = self->preq; struct x86_uop_t *prefetch; /* Process preq */ linked_list_head(preq); while (!linked_list_is_end(preq) && quantum) { /* Get element from prefetch queue. If it is not ready, go to the next one */ prefetch = linked_list_get(preq); if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch)) { linked_list_next(preq); continue; } /* * Make sure its not been prefetched recently. This is just to avoid unnecessary * memory traffic. Even though the cache will realise a "hit" on redundant * prefetches, its still helpful to avoid going to the memory (cache). */ if (prefetch_history_is_redundant(core->prefetch_history, self->data_mod, prefetch->phy_addr)) { /* remove from queue. do not prefetch. */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); prefetch->completed = 1; x86_uop_free_if_not_queued(prefetch); continue; } prefetch->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, prefetch->phy_addr)) { linked_list_next(preq); continue; } /* Remove from prefetch queue */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); /* Access memory system */ mod_access(self->data_mod, mod_access_prefetch, prefetch->phy_addr, NULL, core->event_queue, prefetch, NULL); /* Record prefetched address */ prefetch_history_record(core->prefetch_history, prefetch->phy_addr); /* The cache system will place the prefetch at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ prefetch->in_event_queue = 1; prefetch->issued = 1; prefetch->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[prefetch->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += prefetch->ph_int_idep_count; core->reg_file_fp_reads += prefetch->ph_fp_idep_count; self->num_issued_uinst_array[prefetch->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += prefetch->ph_int_idep_count; self->reg_file_fp_reads += prefetch->ph_fp_idep_count; cpu->num_issued_uinst_array[prefetch->uinst->opcode]++; if (prefetch->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quantum--; /* MMU statistics */ MMUAccessPage(cpu->mmu, prefetch->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", prefetch->id_in_core, core->id); } return quantum; }
static int X86ThreadIssueLQ(X86Thread *self, int quant) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *lq = self->lq; struct x86_uop_t *load; struct mod_client_info_t *client_info; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !X86ThreadIsUopReady(self, load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); X86ThreadRemoveFromLQ(self); /* create and fill the mod_client_info_t object */ client_info = mod_client_info_create(self->data_mod); client_info->prefetcher_eip = load->eip; /* Access memory system */ mod_access(self->data_mod, mod_access_load, load->phy_addr, NULL, core->event_queue, load, client_info); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[load->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += load->ph_int_idep_count; core->reg_file_fp_reads += load->ph_fp_idep_count; self->num_issued_uinst_array[load->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += load->ph_int_idep_count; self->reg_file_fp_reads += load->ph_fp_idep_count; cpu->num_issued_uinst_array[load->uinst->opcode]++; if (load->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* MMU statistics */ MMUAccessPage(cpu->mmu, load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, core->id); } return quant; }
/* * ACC call #2 - accArith * * accArith - Arithmatic calculation for Whetstone Benchmark * * @return * The function always returns 0 if running properly; * ruturns -1 if illegal input value is detected */ static int x86_acc_func_accArith (struct x86_ctx_t *ctx) { int core = 0; int thread = 0; struct x86_regs_t *regs = ctx->regs; struct mem_t *mem = ctx->mem; unsigned int args_ptr; double func_args[4]; /* Read arguments */ args_ptr = regs->ecx; printf ("args_ptr = %u(0x%x)\n\n",args_ptr,args_ptr); func_args[0] = 1.0; func_args[1] = -1.0; func_args[2] = -1.0; func_args[3] = -1.0; /* Get function info */ //mem_read(mem, args_ptr, sizeof(double), func_args); /* mem_read(mem, args_ptr+4, 8, func_args[1] ); mem_read(mem, args_ptr+8, 8, func_args[2] ); mem_read(mem, args_ptr+12, 8, func_args[3] ); mem_read(mem, args_ptr+16, 8, func_args[4] ); */ //func_args[0] = &args_ptr; //func_args[1] = &args_ptr+8; //func_args[2] = &args_ptr+16; //func_args[3] = &args_ptr+24; //func_args[4] = &args_ptr+60; printf("*******************************\n"); printf("In Emulation\n"); //printf("\t\tfunc_args = %u (0x%x)\n", func_args, func_args); printf ("Cycle when getting into this call is %lld\n\n", x86_cpu->cycle); /***********************************************/ struct linked_list_t *sq = X86_THREAD.sq; struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *store; struct x86_uop_t *load; int quant = x86_cpu_issue_width; linked_list_head(sq); while (!linked_list_is_end(sq)&& quant ) { store = linked_list_get(sq); printf ("physical addr @ store: %d\n",store->phy_addr); //assert(store->uinst->opcode == x86_uinst_store); if (!store->ready && !x86_reg_file_ready(store)) { linked_list_next(sq); continue; } store->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { //printf("Debug Point 5\n"); linked_list_next(sq); continue; } int i = 9000; while (i--) { //printf("Debug Point 6\n"); mod_access(X86_THREAD.data_mod, mod_access_store, store->phy_addr, NULL, X86_CORE.event_queue, store); } quant--; // MMU statistics if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } quant = x86_cpu_issue_width; linked_list_head(lq); while (!linked_list_is_end(lq)&& quant ) { load = linked_list_get(lq); printf ("physical add @ load: %d\n",load->phy_addr); //assert(store->uinst->opcode == x86_uinst_store); load->ready = 1; if (!load->ready && !x86_reg_file_ready(load)) { printf("load debug point 1\n"); linked_list_next(sq); continue; } load->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { printf("load debug point 2\n"); linked_list_next(lq); continue; } int j = 9000; while (j--) { //printf("load debug point 3\n"); mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); } quant--; //printf("load debug point 4: quant = %d\n", quant); // MMU statistics if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); // Trace x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } /***********************************************/ /* printf("\t\tfunc_args[0] = %u (0x%x)\n", func_args[0], func_args[0]); printf("\t\tfunc_args[1] = %u (0x%x)\n", func_args[1], func_args[1]); printf("\t\tfunc_args[2] = %u (0x%x)\n", func_args[2], func_args[2]); printf("\t\tfunc_args[3] = %u (0x%x)\n", func_args[3], func_args[3]); printf("\t\tfunc_args[4] = %u (0x%x)\n", func_args[4], func_args[4]); printf("get here 1\n"); */ /* printf("Value:\n\t\tfunc_args[0] = %f (0x%x)\n", *func_args[0], *func_args[0]); printf("\t\tfunc_args[1] = %f (0x%x)\n", *func_args[1], *func_args[1]); printf("\t\tfunc_args[2] = %f (0x%x)\n", *func_args[2], *func_args[2]); printf("\t\tfunc_args[3] = %f (0x%x)\n", *func_args[3], *func_args[3]); printf("\t\tfunc_args[4] = %f (0x%x)\n", *func_args[4], *func_args[4]); */ /* double *A0 = func_args[0]; double *A1 = func_args[1]; double *A2 = func_args[2]; double *A3 = func_args[3]; double *A4 = func_args[4]; */ //double N = *A0; //printf("get here\n"); /* printf("\t\tN = %f (0x%x)\n", *A0,*A0); printf("\t\tA1 = %f (0x%x)\n", *A1,*A1); printf("\t\tA2 = %f (0x%x)\n", *A2,*A2); printf("\t\tA3 = %f (0x%x)\n", *A3,*A3); printf("\t\tA4 = %f (0x%x)\n", *A4,*A4); */ double T = 0.499975; printf ("func_args1 = %f, func_args2 = %f, func_args3 = %f, func_args4 = %f\n",func_args[0],func_args[1],func_args[2],func_args[3]); /* func_args[0] = (func_args[0] + func_args[1] + func_args[2] - func_args[3])*T; func_args[1] = (func_args[0] + func_args[1] - func_args[2] + func_args[3])*T; func_args[2] = (func_args[0] - func_args[1] + func_args[2] - func_args[3])*T; func_args[3] = (-func_args[0] + func_args[1] + func_args[2] + func_args[3])*T; */ return 0; }
void evg_faults_insert(void) { struct evg_fault_t *fault; struct evg_compute_unit_t *compute_unit; for (;;) { linked_list_head(evg_fault_list); fault = linked_list_get(evg_fault_list); if (!fault || fault->cycle > evg_gpu->cycle) break; /* Insert fault depending on fault type */ switch (fault->type) { case evg_fault_ams: { struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; struct evg_work_item_t *work_item; int work_group_id; /* in compute unit */ int wavefront_id; /* in compute unit */ int value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->stack_id, fault->active_mask_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */ work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group; wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group; if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit || !compute_unit->work_groups[work_group_id]) { evg_faults_debug("effect=\"wf_idle\""); goto end_loop; } work_group = compute_unit->work_groups[work_group_id]; wavefront = work_group->wavefronts[wavefront_id]; /* If active_mask_id exceeds stack top, dismiss */ if (fault->active_mask_id > wavefront->stack_top) { evg_faults_debug("effect=\"am_idle\""); goto end_loop; } /* If 'bit' exceeds number of work-items in wavefront, dismiss */ if (fault->bit >= wavefront->work_item_count) { evg_faults_debug("effect=\"wi_idle\""); goto end_loop; } /* Fault caused an error, show affected software entities */ work_item = wavefront->work_items[fault->bit]; evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d", work_group->id, wavefront->id, work_item->id); /* Inject fault */ value = bit_map_get(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1); bit_map_set(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1, !value); evg_fault_errors++; break; } case evg_fault_reg: { struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel; int work_group_id_in_compute_unit; struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; int num_registers_per_work_group; int work_item_id_in_compute_unit; int work_item_id_in_work_group; struct evg_work_item_t *work_item; struct linked_list_t *fetch_queue; struct evg_uop_t *inst_buffer; struct evg_uop_t *exec_buffer; struct heap_t *event_queue; struct evg_uop_t *uop; int lo_reg; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->reg_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group */ num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used * kernel->local_size; work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get affected entities */ work_item_id_in_compute_unit = fault->reg_id / kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size; work_item = work_group->work_items[work_item_id_in_work_group]; wavefront = work_item->wavefront; lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; /* Fault falling between Fetch and Read stage of an instruction * consuming register. This case cannot be modeled due to functional * simulation skew. */ fetch_queue = compute_unit->alu_engine.fetch_queue; inst_buffer = compute_unit->alu_engine.inst_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } /* Fault falling between Fetch and Write stage of an instruction * writing on the register. The instruction will overwrite the fault, * so this shouldn't cause its injection. */ exec_buffer = compute_unit->alu_engine.exec_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } uop = exec_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } event_queue = compute_unit->alu_engine.event_queue; for (heap_first(event_queue, (void **) &uop); uop; heap_next(event_queue, (void **) &uop)) { if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } /* Fault caused error */ evg_faults_debug("effect=\"error\" "); evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ", work_group->id, work_item->wavefront->id, work_item->id, lo_reg); /* Insert the fault */ if (fault->bit < 32) work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit; else if (fault->bit < 64) work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32); else if (fault->bit < 96) work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64); else work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96); evg_fault_errors++; break; } case evg_fault_mem: { struct evg_work_group_t *work_group; int work_group_id_in_compute_unit; unsigned char value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->byte, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Check if there is any local memory used at all */ if (!evg_gpu->ndrange->local_mem_top) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group */ work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Inject fault */ evg_faults_debug("effect=\"error\" wg=%d ", work_group->id); mem_read(work_group->local_mem, fault->byte, 1, &value); value ^= 1 << fault->bit; mem_write(work_group->local_mem, fault->byte, 1, &value); evg_fault_errors++; break; } default: panic("invalid fault type"); } end_loop: /* Extract and free */ free(fault); linked_list_remove(evg_fault_list); evg_faults_debug("\n"); /* If all faults were inserted and no error was caused, end simulation */ if (!linked_list_count(evg_fault_list) && !evg_fault_errors) esim_finish = esim_finish_evg_no_faults; } }
static int x86_cpu_issue_lq(int core, int thread, int quant) { struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *load; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !x86_reg_file_ready(load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); x86_lq_remove(core, thread); /* Access memory system */ mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = x86_cpu->cycle; /* Instruction issued */ X86_CORE.issued[load->uinst->opcode]++; X86_CORE.lsq_reads++; X86_CORE.reg_file_int_reads += load->ph_int_idep_count; X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count; X86_THREAD.issued[load->uinst->opcode]++; X86_THREAD.lsq_reads++; X86_THREAD.reg_file_int_reads += load->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count; x86_cpu->issued[load->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } return quant; }
static int x86_cpu_issue_iq(int core, int thread, int quant) { struct linked_list_t *iq = X86_THREAD.iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !x86_reg_file_ready(uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'x86_reg_file_ready' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'x86_fu_reserve' * returns 1 cycle latency. If there is no functional unit available, * 'x86_fu_reserve' returns 0. */ lat = x86_fu_reserve(uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ x86_iq_remove(core, thread); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = x86_cpu->cycle; uop->when = x86_cpu->cycle + lat; x86_event_queue_insert(X86_CORE.event_queue, uop); /* Instruction issued */ X86_CORE.issued[uop->uinst->opcode]++; X86_CORE.iq_reads++; X86_CORE.reg_file_int_reads += uop->ph_int_idep_count; X86_CORE.reg_file_fp_reads += uop->ph_fp_idep_count; X86_THREAD.issued[uop->uinst->opcode]++; X86_THREAD.iq_reads++; X86_THREAD.reg_file_int_reads += uop->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += uop->ph_fp_idep_count; x86_cpu->issued[uop->uinst->opcode]++; quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, uop->core); } return quant; }
void evg_isa_write_task_commit(struct evg_work_item_t *work_item) { struct linked_list_t *task_list = work_item->write_task_list; struct evg_wavefront_t *wavefront = work_item->wavefront; struct evg_work_group_t *work_group = work_item->work_group; struct evg_isa_write_task_t *wt; struct evg_inst_t *inst; /* Process first tasks of type: * - EVG_ISA_WRITE_TASK_WRITE_DEST * - EVG_ISA_WRITE_TASK_WRITE_LDS */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); assert(wt->work_item == work_item); inst = wt->inst; switch (wt->kind) { case EVG_ISA_WRITE_TASK_WRITE_DEST: { if (wt->write_mask) evg_isa_write_gpr(work_item, wt->gpr, wt->rel, wt->chan, wt->value); work_item->pv.elem[wt->inst->alu] = wt->value; /* Debug */ if (evg_isa_debugging()) { evg_isa_debug(" i%d:%s", work_item->id, map_value(&evg_pv_map, wt->inst->alu)); if (wt->write_mask) { evg_isa_debug(","); evg_inst_dump_gpr(wt->gpr, wt->rel, wt->chan, 0, debug_file(evg_isa_debug_category)); } evg_isa_debug("<="); gpu_isa_dest_value_dump(inst, &wt->value, debug_file(evg_isa_debug_category)); } break; } case EVG_ISA_WRITE_TASK_WRITE_LDS: { struct mem_t *local_mem; union evg_reg_t lds_value; local_mem = work_group->local_mem; assert(local_mem); assert(wt->lds_value_size); mem_write(local_mem, wt->lds_addr, wt->lds_value_size, &wt->lds_value); /* Debug */ lds_value.as_uint = wt->lds_value; evg_isa_debug(" i%d:LDS[0x%x]<=(%u,%gf) (%d bytes)", work_item->id, wt->lds_addr, lds_value.as_uint, lds_value.as_float, (int) wt->lds_value_size); break; } default: linked_list_next(task_list); continue; } /* Done with this task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* Process PUSH_BEFORE, PRED_SET */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); inst = wt->inst; /* Process */ switch (wt->kind) { case EVG_ISA_WRITE_TASK_PUSH_BEFORE: { if (!wavefront->push_before_done) evg_wavefront_stack_push(wavefront); wavefront->push_before_done = 1; break; } case EVG_ISA_WRITE_TASK_SET_PRED: { int update_pred = EVG_ALU_WORD1_OP2.update_pred; int update_exec_mask = EVG_ALU_WORD1_OP2.update_exec_mask; assert(inst->info->fmt[1] == EVG_FMT_ALU_WORD1_OP2); if (update_pred) evg_work_item_set_pred(work_item, wt->cond); if (update_exec_mask) evg_work_item_set_active(work_item, wt->cond); /* Debug */ if (debug_status(evg_isa_debug_category)) { if (update_pred && update_exec_mask) evg_isa_debug(" i%d:act/pred<=%d", work_item->id, wt->cond); else if (update_pred) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); else if (update_exec_mask) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); } break; } default: abort(); } /* Done with task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* List should be empty */ assert(!linked_list_count(task_list)); }
static int issue_lq(int core, int thread, int quant) { struct linked_list_t *lq = THREAD.lq; struct uop_t *load; /* Debug */ if (esim_debug_file) uop_lnlist_check_if_ready(lq); /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !rf_ready(load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(THREAD.data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); lq_remove(core, thread); /* Access memory system */ mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_read, load->phy_addr, NULL, CORE.eventq, load); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_eventq" to * prevent the uop from being freed. */ load->in_eventq = 1; load->issued = 1; load->issue_when = cpu->cycle; /* Instruction issued */ CORE.issued[load->uinst->opcode]++; CORE.lsq_reads++; CORE.rf_int_reads += load->ph_int_idep_count; CORE.rf_fp_reads += load->ph_fp_idep_count; THREAD.issued[load->uinst->opcode]++; THREAD.lsq_reads++; THREAD.rf_int_reads += load->ph_int_idep_count; THREAD.rf_fp_reads += load->ph_fp_idep_count; cpu->issued[load->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); /* Debug */ esim_debug("uop action=\"update\", core=%d, seq=%llu," " stg_issue=1, in_lsq=0, issued=1\n", load->core, (long long unsigned) load->di_seq); } return quant; }
static int x86_acc_func_accDTW (struct x86_ctx_t *ctx) { struct x86_regs_t *regs = ctx->regs; struct mem_t *mem = ctx->mem; int core; int thread; unsigned int args_ptr; //int x; struct arglist func_args; /* Read arguments */ args_ptr = regs->ecx; /* Get function info */ mem_read(mem, args_ptr, sizeof(arglist), &func_args); printf("\t\t**sample1 = %p (%p)\n", func_args.sample1, &(func_args.sample1[0][0])); printf("\t\tlength1 = %u (%p)\n", func_args.length1, &func_args.length1); printf("\t\t**sample2 = %p (%p)\n", func_args.sample2, &(func_args.sample2[0][0])); printf("\t\tlength2 = %u (%p)\n", func_args.length2, &func_args.length2); printf("\t\ti = %u (%p)\n", func_args.i, &func_args.i); printf("\t\tj = %u (%p)\n", func_args.j, &func_args.j); printf("\t\t*table = %p (%p)\n", func_args.table, &(func_args.table[0])); /***********************************************/ #define L2ONLY #define WITHACC #ifdef L2ONLY printf ("Cache Behavior Simulation\n"); char * mod_name = "mod-l2-0"; X86_THREAD.data_mod = mem_system_get_mod (mod_name); #endif #ifdef WITHACC struct linked_list_t *sq = X86_THREAD.sq; struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *store; struct x86_uop_t *load; int quant = x86_cpu_issue_width; unsigned int count1, count2; for (count1 = 0; count1 < 124; count1 ++) { for(count2 = 0; count2 < 124; count2++) { linked_list_head(sq); while (!linked_list_is_end(sq)&& quant ) { store = linked_list_get(sq); printf("\n\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n\n"); printf("physical addr @ store: %d\n",store->phy_addr); assert(store->uinst->opcode == x86_uinst_store); if (!store->ready && !x86_reg_file_ready(store)) { linked_list_next(sq); continue; } store->ready = 1; printf ("data module kind: %d\n",X86_THREAD.data_mod->kind); printf ("data module level: %d\n",X86_THREAD.data_mod->level); printf ("data module name: %s\n",X86_THREAD.data_mod->name); printf ("data module cache name: %s\n",X86_THREAD.data_mod->cache->name); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { linked_list_next(sq); continue; } int i = 3; while (i--) { printf("Store Debug Point 6\n"); mod_access(X86_THREAD.data_mod, mod_access_store, store->phy_addr, NULL, X86_CORE.event_queue, store); } quant--; // MMU statistics if (*mmu_report_file_name) mmu_access_page(store->phy_addr, mmu_access_write); } quant = x86_cpu_issue_width; //printf("Load Simulation ... \n"); linked_list_head(lq); while (!linked_list_is_end(lq)&& quant ) { load = linked_list_get(lq); printf ("physical add @ load: %d\n",load->phy_addr); assert(store->uinst->opcode == x86_uinst_store); load->ready = 1; if (!load->ready && !x86_reg_file_ready(load)) { printf("load debug point 1\n"); linked_list_next(sq); continue; } load->ready = 1; //printf ("physical add: %d\n",load->phy_addr); if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr)) { printf("load debug point 2\n"); linked_list_next(lq); continue; } int j = 1; while (j--) { printf("load debug point 3\n"); mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); } quant--; //printf("load debug point 4: quant = %d\n", quant); // MMU statistics if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); // Trace x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } } } #endif #ifdef L2ONLY mod_name = "mod-dl1-0"; X86_THREAD.data_mod = mem_system_get_mod (mod_name); #endif /***********************************************/ int ret = DTWdistance(func_args.sample1, func_args.length1, func_args.sample2, func_args.length2, func_args.i, func_args.j, func_args.table); return ret; }