static void tree_output() { Symbol **symbols, *main_sym; size_t i, num; cflow_depmap_t depmap; /* Collect functions and assign them ordinal numbers */ num = collect_functions(&symbols); for (i = 0; i < num; i++) symbols[i]->ord = i; /* Create a dependency matrix */ depmap = depmap_alloc(num); for (i = 0; i < num; i++) { if (symbols[i]->callee) { struct linked_list_entry *p; for (p = linked_list_head(symbols[i]->callee); p; p = p->next) { Symbol *s = (Symbol*) p->data; if (symbol_is_function(s)) depmap_set(depmap, i, ((Symbol*)p->data)->ord); } } } depmap_tc(depmap); /* Mark recursive calls */ for (i = 0; i < num; i++) if (depmap_isset(depmap, i, i)) symbols[i]->recursive = 1; free(depmap); free(symbols); /* Collect and sort all symbols */ num = collect_symbols(&symbols, is_var, 0); qsort(symbols, num, sizeof(*symbols), compare); /* Produce output */ begin(); if (reverse_tree) { for (i = 0; i < num; i++) { inverted_tree(0, 0, symbols[i]); separator(); } } else { main_sym = lookup(start_name); if (main_sym) { direct_tree(0, 0, main_sym); separator(); } else { for (i = 0; i < num; i++) { if (symbols[i]->callee == NULL) continue; direct_tree(0, 0, symbols[i]); separator(); } } } end(); free(symbols); }
static int X86ThreadIssuePreQ(X86Thread *self, int quantum) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *preq = self->preq; struct x86_uop_t *prefetch; /* Process preq */ linked_list_head(preq); while (!linked_list_is_end(preq) && quantum) { /* Get element from prefetch queue. If it is not ready, go to the next one */ prefetch = linked_list_get(preq); if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch)) { linked_list_next(preq); continue; } /* * Make sure its not been prefetched recently. This is just to avoid * unnecessary * memory traffic. Even though the cache will realise a "hit" on redundant * prefetches, its still helpful to avoid going to the memory (cache). */ if (prefetch_history_is_redundant(core->prefetch_history, self->data_mod, prefetch->phy_addr)) { /* remove from queue. do not prefetch. */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); prefetch->completed = 1; x86_uop_free_if_not_queued(prefetch); continue; } prefetch->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, prefetch->phy_addr)) { linked_list_next(preq); continue; } /* Remove from prefetch queue */ assert(prefetch->uinst->opcode == x86_uinst_prefetch); X86ThreadRemovePreQ(self); /* Access memory system */ mod_access(self->data_mod, mod_access_prefetch, prefetch->phy_addr, NULL, core->event_queue, prefetch, NULL); /* Record prefetched address */ prefetch_history_record(core->prefetch_history, prefetch->phy_addr); /* The cache system will place the prefetch at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ prefetch->in_event_queue = 1; prefetch->issued = 1; prefetch->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[prefetch->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += prefetch->ph_int_idep_count; core->reg_file_fp_reads += prefetch->ph_fp_idep_count; self->num_issued_uinst_array[prefetch->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += prefetch->ph_int_idep_count; self->reg_file_fp_reads += prefetch->ph_fp_idep_count; cpu->num_issued_uinst_array[prefetch->uinst->opcode]++; if (prefetch->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quantum--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(prefetch->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", prefetch->id_in_core, core->id); } return quantum; }
static int X86ThreadIssueIQ(X86Thread *self, int quant) { X86Cpu *cpu = self->cpu; X86Core *core = self->core; struct linked_list_t *iq = self->iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !X86ThreadIsUopReady(self, uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'X86ThreadIsUopReady' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, * 'X86CoreReserveFunctionalUnit' * returns 1 cycle latency. If there is no functional unit available, * 'X86CoreReserveFunctionalUnit' returns 0. */ lat = X86CoreReserveFunctionalUnit(core, uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ X86ThreadRemoveFromIQ(self); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = asTiming(cpu)->cycle; uop->when = asTiming(cpu)->cycle + lat; X86CoreInsertInEventQueue(core, uop); /* Statistics */ core->num_issued_uinst_array[uop->uinst->opcode]++; core->iq_reads++; core->reg_file_int_reads += uop->ph_int_idep_count; core->reg_file_fp_reads += uop->ph_fp_idep_count; self->num_issued_uinst_array[uop->uinst->opcode]++; self->iq_reads++; self->reg_file_int_reads += uop->ph_int_idep_count; self->reg_file_fp_reads += uop->ph_fp_idep_count; cpu->num_issued_uinst_array[uop->uinst->opcode]++; if (uop->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, core->id); } return quant; }
static int x86_cpu_issue_lq(int core, int thread, int quant) { struct linked_list_t *lq = X86_THREAD.lq; struct x86_uop_t *load; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !x86_reg_file_ready(load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); x86_lq_remove(core, thread); /* Access memory system */ mod_access(X86_THREAD.data_mod, mod_access_load, load->phy_addr, NULL, X86_CORE.event_queue, load); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = x86_cpu->cycle; /* Instruction issued */ X86_CORE.issued[load->uinst->opcode]++; X86_CORE.lsq_reads++; X86_CORE.reg_file_int_reads += load->ph_int_idep_count; X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count; X86_THREAD.issued[load->uinst->opcode]++; X86_THREAD.lsq_reads++; X86_THREAD.reg_file_int_reads += load->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count; x86_cpu->issued[load->uinst->opcode]++; quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, load->core); } return quant; }
static int X86ThreadIssueLQ(X86Thread *self, int quant) { X86Core *core = self->core; X86Cpu *cpu = self->cpu; struct linked_list_t *lq = self->lq; struct x86_uop_t *load; struct mod_client_info_t *client_info; /* Process lq */ linked_list_head(lq); while (!linked_list_is_end(lq) && quant) { /* Get element from load queue. If it is not ready, go to the next one */ load = linked_list_get(lq); if (!load->ready && !X86ThreadIsUopReady(self, load)) { linked_list_next(lq); continue; } load->ready = 1; /* Check that memory system is accessible */ if (!mod_can_access(self->data_mod, load->phy_addr)) { linked_list_next(lq); continue; } /* Remove from load queue */ assert(load->uinst->opcode == x86_uinst_load); X86ThreadRemoveFromLQ(self); /* create and fill the mod_client_info_t object */ client_info = mod_client_info_create(self->data_mod); client_info->prefetcher_eip = load->eip; /* Access memory system */ mod_access(self->data_mod, mod_access_load, load->phy_addr, NULL, core->event_queue, load, client_info); /* The cache system will place the load at the head of the * event queue when it is ready. For now, mark "in_event_queue" to * prevent the uop from being freed. */ load->in_event_queue = 1; load->issued = 1; load->issue_when = asTiming(cpu)->cycle; /* Statistics */ core->num_issued_uinst_array[load->uinst->opcode]++; core->lsq_reads++; core->reg_file_int_reads += load->ph_int_idep_count; core->reg_file_fp_reads += load->ph_fp_idep_count; self->num_issued_uinst_array[load->uinst->opcode]++; self->lsq_reads++; self->reg_file_int_reads += load->ph_int_idep_count; self->reg_file_fp_reads += load->ph_fp_idep_count; cpu->num_issued_uinst_array[load->uinst->opcode]++; if (load->trace_cache) self->trace_cache->num_issued_uinst++; /* One more instruction issued, update quantum. */ quant--; /* MMU statistics */ if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read); /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core, core->id); } return quant; }
static int x86_cpu_issue_iq(int core, int thread, int quant) { struct linked_list_t *iq = X86_THREAD.iq; struct x86_uop_t *uop; int lat; /* Find instruction to issue */ linked_list_head(iq); while (!linked_list_is_end(iq) && quant) { /* Get element from IQ */ uop = linked_list_get(iq); assert(x86_uop_exists(uop)); assert(!(uop->flags & X86_UINST_MEM)); if (!uop->ready && !x86_reg_file_ready(uop)) { linked_list_next(iq); continue; } uop->ready = 1; /* avoid next call to 'x86_reg_file_ready' */ /* Run the instruction in its corresponding functional unit. * If the instruction does not require a functional unit, 'x86_fu_reserve' * returns 1 cycle latency. If there is no functional unit available, * 'x86_fu_reserve' returns 0. */ lat = x86_fu_reserve(uop); if (!lat) { linked_list_next(iq); continue; } /* Instruction was issued to the corresponding fu. * Remove it from IQ */ x86_iq_remove(core, thread); /* Schedule inst in Event Queue */ assert(!uop->in_event_queue); assert(lat > 0); uop->issued = 1; uop->issue_when = x86_cpu->cycle; uop->when = x86_cpu->cycle + lat; x86_event_queue_insert(X86_CORE.event_queue, uop); /* Instruction issued */ X86_CORE.issued[uop->uinst->opcode]++; X86_CORE.iq_reads++; X86_CORE.reg_file_int_reads += uop->ph_int_idep_count; X86_CORE.reg_file_fp_reads += uop->ph_fp_idep_count; X86_THREAD.issued[uop->uinst->opcode]++; X86_THREAD.iq_reads++; X86_THREAD.reg_file_int_reads += uop->ph_int_idep_count; X86_THREAD.reg_file_fp_reads += uop->ph_fp_idep_count; x86_cpu->issued[uop->uinst->opcode]++; quant--; /* Trace */ x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core, uop->core); } return quant; }
void evg_isa_write_task_commit(struct evg_work_item_t *work_item) { struct linked_list_t *task_list = work_item->write_task_list; struct evg_wavefront_t *wavefront = work_item->wavefront; struct evg_work_group_t *work_group = work_item->work_group; struct evg_isa_write_task_t *wt; struct evg_inst_t *inst; /* Process first tasks of type: * - EVG_ISA_WRITE_TASK_WRITE_DEST * - EVG_ISA_WRITE_TASK_WRITE_LDS */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); assert(wt->work_item == work_item); inst = wt->inst; switch (wt->kind) { case EVG_ISA_WRITE_TASK_WRITE_DEST: { if (wt->write_mask) evg_isa_write_gpr(work_item, wt->gpr, wt->rel, wt->chan, wt->value); work_item->pv.elem[wt->inst->alu] = wt->value; /* Debug */ if (evg_isa_debugging()) { evg_isa_debug(" i%d:%s", work_item->id, map_value(&evg_pv_map, wt->inst->alu)); if (wt->write_mask) { evg_isa_debug(","); evg_inst_dump_gpr(wt->gpr, wt->rel, wt->chan, 0, debug_file(evg_isa_debug_category)); } evg_isa_debug("<="); gpu_isa_dest_value_dump(inst, &wt->value, debug_file(evg_isa_debug_category)); } break; } case EVG_ISA_WRITE_TASK_WRITE_LDS: { struct mem_t *local_mem; union evg_reg_t lds_value; local_mem = work_group->local_mem; assert(local_mem); assert(wt->lds_value_size); mem_write(local_mem, wt->lds_addr, wt->lds_value_size, &wt->lds_value); /* Debug */ lds_value.as_uint = wt->lds_value; evg_isa_debug(" i%d:LDS[0x%x]<=(%u,%gf) (%d bytes)", work_item->id, wt->lds_addr, lds_value.as_uint, lds_value.as_float, (int) wt->lds_value_size); break; } default: linked_list_next(task_list); continue; } /* Done with this task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* Process PUSH_BEFORE, PRED_SET */ for (linked_list_head(task_list); !linked_list_is_end(task_list); ) { /* Get task */ wt = linked_list_get(task_list); inst = wt->inst; /* Process */ switch (wt->kind) { case EVG_ISA_WRITE_TASK_PUSH_BEFORE: { if (!wavefront->push_before_done) evg_wavefront_stack_push(wavefront); wavefront->push_before_done = 1; break; } case EVG_ISA_WRITE_TASK_SET_PRED: { int update_pred = EVG_ALU_WORD1_OP2.update_pred; int update_exec_mask = EVG_ALU_WORD1_OP2.update_exec_mask; assert(inst->info->fmt[1] == EVG_FMT_ALU_WORD1_OP2); if (update_pred) evg_work_item_set_pred(work_item, wt->cond); if (update_exec_mask) evg_work_item_set_active(work_item, wt->cond); /* Debug */ if (debug_status(evg_isa_debug_category)) { if (update_pred && update_exec_mask) evg_isa_debug(" i%d:act/pred<=%d", work_item->id, wt->cond); else if (update_pred) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); else if (update_exec_mask) evg_isa_debug(" i%d:pred=%d", work_item->id, wt->cond); } break; } default: abort(); } /* Done with task */ repos_free_object(evg_isa_write_task_repos, wt); linked_list_remove(task_list); } /* List should be empty */ assert(!linked_list_count(task_list)); }
static void evg_tex_engine_fetch(EvgComputeUnit *compute_unit) { EvgGpu *gpu = compute_unit->gpu; struct linked_list_t *pending_queue = compute_unit->tex_engine.pending_queue; struct linked_list_t *finished_queue = compute_unit->tex_engine.finished_queue; EvgWavefront *wavefront; struct evg_uop_t *cf_uop, *uop; struct evg_work_item_uop_t *work_item_uop; EvgInst *inst; int inst_num; EvgWorkItem *work_item; int work_item_id; char str[MAX_LONG_STRING_SIZE]; char str_trimmed[MAX_LONG_STRING_SIZE]; /* Get wavefront to fetch from */ linked_list_head(pending_queue); cf_uop = linked_list_get(pending_queue); if (!cf_uop) return; wavefront = cf_uop->wavefront; assert(wavefront->clause_kind == EvgInstClauseTEX); /* If fetch queue is full, cannot fetch until space is made */ if (compute_unit->tex_engine.fetch_queue_length >= evg_gpu_tex_engine_fetch_queue_size) return; /* Emulate instruction and create uop */ inst_num = (wavefront->clause_buf - wavefront->clause_buf_start) / 16; EvgWavefrontExecute(wavefront); inst = wavefront->tex_inst; uop = evg_uop_create(compute_unit); uop->wavefront = wavefront; uop->work_group = wavefront->work_group; uop->cf_uop = cf_uop; uop->id_in_compute_unit = compute_unit->gpu_uop_id_counter++; uop->last = wavefront->clause_kind != EvgInstClauseTEX; uop->global_mem_read = wavefront->global_mem_read; uop->global_mem_write = wavefront->global_mem_write; uop->vliw_slots = 1; /* If TEX clause finished, extract CF uop from 'pending_queue' and * insert it into 'finished_queue'. */ if (uop->last) { linked_list_remove(pending_queue); linked_list_add(finished_queue, cf_uop); } /* If instruction is a global memory read (should be), record addresses */ if (uop->global_mem_read) { assert((inst->info->flags & EvgInstFlagMemRead)); EVG_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id) { work_item = gpu->ndrange->work_items[work_item_id]; work_item_uop = &uop->work_item_uop[work_item->id_in_wavefront]; work_item_uop->global_mem_access_addr = work_item->global_mem_access_addr; work_item_uop->global_mem_access_size = work_item->global_mem_access_size; }
void evg_faults_init(void) { FILE *f; char line[MAX_STRING_SIZE]; char *line_ptr; struct evg_fault_t *fault; int line_num; long long last_cycle; evg_fault_list = linked_list_create(); if (!*evg_faults_file_name) return; f = fopen(evg_faults_file_name, "rt"); if (!f) fatal("%s: cannot open file", evg_faults_file_name); line_num = 0; last_cycle = 0; while (!feof(f)) { const char *delim = " "; /* Read a line */ line_num++; line_ptr = fgets(line, MAX_STRING_SIZE, f); if (!line_ptr) break; /* Allocate new fault */ fault = calloc(1, sizeof(struct evg_fault_t)); if (!fault) fatal("%s: out of memory", __FUNCTION__); /* Read <cycle> field */ line_ptr = strtok(line_ptr, delim); if (!line_ptr) goto wrong_format; fault->cycle = atoll(line_ptr); if (fault->cycle < 1) fatal("%s: line %d: lowest possible cycle is 1", evg_faults_file_name, line_num); if (fault->cycle < last_cycle) fatal("%s: line %d: cycles must be ordered", evg_faults_file_name, line_num); /* <fault> - Type of fault */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; if (!strcmp(line_ptr, "ams")) fault->type = evg_fault_ams; else if (!strcmp(line_ptr, "reg")) fault->type = evg_fault_reg; else if (!strcmp(line_ptr, "mem")) fault->type = evg_fault_mem; else fatal("%s: line %d: invalid value for <fault> ('%s')", evg_faults_file_name, line_num, line_ptr); /* <cu_id> - Compute unit */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->compute_unit_id = atoi(line_ptr); if (fault->compute_unit_id >= evg_gpu_num_compute_units || fault->compute_unit_id < 0) fatal("%s: line %d: invalid compute unit ID", evg_faults_file_name, line_num); /* Analyze rest of the line depending on fault type */ switch (fault->type) { case evg_fault_ams: /* <stack_id> - Stack ID */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->stack_id = atoi(line_ptr); if (fault->stack_id >= evg_gpu_max_wavefronts_per_compute_unit) fatal("%s: line %d: invalid stack ID", evg_faults_file_name, line_num); /* <am_id> - Active mask ID */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->active_mask_id = atoi(line_ptr); if (fault->active_mask_id >= EVG_MAX_STACK_SIZE) fatal("%s: line %d: invalid active mask ID", evg_faults_file_name, line_num); /* <bit> */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->bit = atoi(line_ptr); if (fault->bit >= evg_emu_wavefront_size) fatal("%s: line %d: invalid bit index", evg_faults_file_name, line_num); /* No more tokens */ if (strtok(NULL, delim)) fatal("%s: line %d: too many arguments", evg_faults_file_name, line_num); break; case evg_fault_reg: /* <reg_id> - Register ID */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->reg_id = atoi(line_ptr); if (fault->reg_id >= evg_gpu_num_registers || fault->reg_id < 0) fatal("%s: line %d: invalid compute unit ID", evg_faults_file_name, line_num); /* <bit> */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->bit = atoi(line_ptr); if (fault->bit < 0 || fault->bit >= 128) fatal("%s: line %d: invalid bit index", evg_faults_file_name, line_num); break; case evg_fault_mem: /* <byte> - Byte position in local memory */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->byte = atoi(line_ptr); if (fault->byte >= evg_gpu_local_mem_size || fault->byte < 0) fatal("%s: line %d: invalid byte position", evg_faults_file_name, line_num); /* <bit> - Bit position */ line_ptr = strtok(NULL, delim); if (!line_ptr) goto wrong_format; fault->bit = atoi(line_ptr); if (fault->bit > 7 || fault->bit < 0) fatal("%s: line %d: invalid bit position", evg_faults_file_name, line_num); break; } /* Insert fault in fault list */ linked_list_out(evg_fault_list); linked_list_insert(evg_fault_list, fault); last_cycle = fault->cycle; continue; wrong_format: fatal("%s: line %d: not enough arguments", evg_faults_file_name, line_num); } linked_list_head(evg_fault_list); }
void evg_faults_insert(void) { struct evg_fault_t *fault; struct evg_compute_unit_t *compute_unit; for (;;) { linked_list_head(evg_fault_list); fault = linked_list_get(evg_fault_list); if (!fault || fault->cycle > evg_gpu->cycle) break; /* Insert fault depending on fault type */ switch (fault->type) { case evg_fault_ams: { struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; struct evg_work_item_t *work_item; int work_group_id; /* in compute unit */ int wavefront_id; /* in compute unit */ int value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->stack_id, fault->active_mask_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */ work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group; wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group; if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit || !compute_unit->work_groups[work_group_id]) { evg_faults_debug("effect=\"wf_idle\""); goto end_loop; } work_group = compute_unit->work_groups[work_group_id]; wavefront = work_group->wavefronts[wavefront_id]; /* If active_mask_id exceeds stack top, dismiss */ if (fault->active_mask_id > wavefront->stack_top) { evg_faults_debug("effect=\"am_idle\""); goto end_loop; } /* If 'bit' exceeds number of work-items in wavefront, dismiss */ if (fault->bit >= wavefront->work_item_count) { evg_faults_debug("effect=\"wi_idle\""); goto end_loop; } /* Fault caused an error, show affected software entities */ work_item = wavefront->work_items[fault->bit]; evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d", work_group->id, wavefront->id, work_item->id); /* Inject fault */ value = bit_map_get(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1); bit_map_set(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1, !value); evg_fault_errors++; break; } case evg_fault_reg: { struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel; int work_group_id_in_compute_unit; struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; int num_registers_per_work_group; int work_item_id_in_compute_unit; int work_item_id_in_work_group; struct evg_work_item_t *work_item; struct linked_list_t *fetch_queue; struct evg_uop_t *inst_buffer; struct evg_uop_t *exec_buffer; struct heap_t *event_queue; struct evg_uop_t *uop; int lo_reg; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->reg_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group */ num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used * kernel->local_size; work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get affected entities */ work_item_id_in_compute_unit = fault->reg_id / kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size; work_item = work_group->work_items[work_item_id_in_work_group]; wavefront = work_item->wavefront; lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; /* Fault falling between Fetch and Read stage of an instruction * consuming register. This case cannot be modeled due to functional * simulation skew. */ fetch_queue = compute_unit->alu_engine.fetch_queue; inst_buffer = compute_unit->alu_engine.inst_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } /* Fault falling between Fetch and Write stage of an instruction * writing on the register. The instruction will overwrite the fault, * so this shouldn't cause its injection. */ exec_buffer = compute_unit->alu_engine.exec_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } uop = exec_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } event_queue = compute_unit->alu_engine.event_queue; for (heap_first(event_queue, (void **) &uop); uop; heap_next(event_queue, (void **) &uop)) { if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } /* Fault caused error */ evg_faults_debug("effect=\"error\" "); evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ", work_group->id, work_item->wavefront->id, work_item->id, lo_reg); /* Insert the fault */ if (fault->bit < 32) work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit; else if (fault->bit < 64) work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32); else if (fault->bit < 96) work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64); else work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96); evg_fault_errors++; break; } case evg_fault_mem: { struct evg_work_group_t *work_group; int work_group_id_in_compute_unit; unsigned char value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->byte, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Check if there is any local memory used at all */ if (!evg_gpu->ndrange->local_mem_top) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group */ work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Inject fault */ evg_faults_debug("effect=\"error\" wg=%d ", work_group->id); mem_read(work_group->local_mem, fault->byte, 1, &value); value ^= 1 << fault->bit; mem_write(work_group->local_mem, fault->byte, 1, &value); evg_fault_errors++; break; } default: panic("invalid fault type"); } end_loop: /* Extract and free */ free(fault); linked_list_remove(evg_fault_list); evg_faults_debug("\n"); /* If all faults were inserted and no error was caused, end simulation */ if (!linked_list_count(evg_fault_list) && !evg_fault_errors) esim_finish = esim_finish_evg_no_faults; } }