//{{{ void test_bit_map(void) void test_bit_map(void) { struct bit_map *bm = bit_map_init(100); TEST_ASSERT_EQUAL(0, bit_map_get(bm, 1000)); bit_map_set(bm, 1000); TEST_ASSERT_EQUAL(1, bit_map_get(bm, 1000)); uint32_t A[500]; uint32_t i; A[0] = 1000; for (i = 1; i < 500; ++i) { A[i] = rand() % 10000; bit_map_set(bm, A[i]); } qsort(A, 500, sizeof(uint32_t), uint32_t_cmp); uint32_t j; for (i = 0; i < 499; ++i) { TEST_ASSERT_EQUAL(1, bit_map_get(bm, A[i])); for (j = A[i]+1; j < A[i+1]; ++j) TEST_ASSERT_EQUAL(0, bit_map_get(bm, j)); } char *file_name = "test_bit_map.out"; FILE *f = fopen(file_name, "wb"); bit_map_store(bm, f, file_name); fclose(f); f = fopen(file_name, "rb"); struct bit_map *bm_r = bit_map_load(f, file_name); fclose(f); TEST_ASSERT_EQUAL(bm->num_bits, bm_r->num_bits); TEST_ASSERT_EQUAL(bm->num_ints, bm_r->num_ints); for (i = 0; i < 499; ++i) { TEST_ASSERT_EQUAL(1, bit_map_get(bm_r, A[i])); for (j = A[i]+1; j < A[i+1]; ++j) TEST_ASSERT_EQUAL(0, bit_map_get(bm_r, j)); } bit_map_destroy(&bm); bit_map_destroy(&bm_r); remove(file_name); }
static void X86ContextDoCreate(X86Context *self, X86Emu *emu) { int num_nodes; int i; /* Initialize */ self->emu = emu; self->pid = emu->current_pid++; self->sched_policy = SCHED_RR; self->sched_priority = 1; /* Lowest priority */ /* Update state so that the context is inserted in the * corresponding lists. The x86_ctx_running parameter has no * effect, since it will be updated later. */ X86ContextSetState(self, X86ContextRunning); DOUBLE_LINKED_LIST_INSERT_HEAD(emu, context, self); /* Structures */ self->regs = x86_regs_create(); self->backup_regs = x86_regs_create(); self->signal_mask_table = x86_signal_mask_table_create(); /* Thread affinity mask, used only for timing simulation. It is * initialized to all 1's. */ num_nodes = x86_cpu_num_cores * x86_cpu_num_threads; self->affinity = bit_map_create(num_nodes); for (i = 0; i < num_nodes; i++) bit_map_set(self->affinity, i, 1, 1); /* Initialize statically allocate instruction */ new_static(&self->inst, X86Inst, emu->as); /* Virtual functions */ asObject(self)->Dump = X86ContextDump; }
void si_work_item_set_pred(struct si_work_item_t *work_item, int pred) { struct si_wavefront_t *wavefront = work_item->wavefront; assert(work_item->id_in_wavefront >= 0 && work_item->id_in_wavefront < wavefront->work_item_count); bit_map_set(wavefront->pred, work_item->id_in_wavefront, 1, !!pred); wavefront->pred_mask_update = 1; }
void KplWarpCreate(KplWarp *self, int id, KplThreadBlock *thread_block, KplGrid *grid) { KplEmu *emu = grid->emu; /* Initialization */ self->id = id + thread_block->id * thread_block->warp_count; self->id_in_thread_block = id; self->grid = grid; self->thread_block = thread_block; /* Allocate threads */ if (id < thread_block->warp_count - 1) self->thread_count = kpl_emu_warp_size; else self->thread_count = grid->thread_block_size - (thread_block->warp_count - 1) * kpl_emu_warp_size; self->threads = (KplThread **) xcalloc(self->thread_count, sizeof(KplThread *)); /* Instruction */ self->inst = KplInstWrapCreate(emu->as); self->inst_size = 8; self->inst_buffer = grid->function->inst_bin; self->inst_buffer_size = grid->function->inst_bin_size; /* Sync stack */ self->sync_stack_top = 0; self->sync_stack.entries[self->sync_stack_top].active_thread_mask = bit_map_create(self->thread_count); bit_map_set(self->sync_stack.entries[self->sync_stack_top]. active_thread_mask, 0, self->thread_count, ((unsigned long long)1 << self->thread_count) - 1); /* Reset flags */ self->at_barrier = 0; self->finished_thread_count = 0; self->finished = 0; }
void frm_grid_setup_threads(struct frm_grid_t *grid) { struct frm_cuda_function_t *function = grid->function; struct frm_threadblock_t *threadblock; struct frm_warp_t *warp; struct frm_thread_t *thread; int bidx, bidy, bidz; /* 3D threadblock ID iterators */ int lidx, lidy, lidz; /* 3D thread local ID iterators */ int tid; /* Global ID iterator */ int bid; /* Threadblock ID iterator */ int wid; /* Warp ID iterator */ int lid; /* Local ID iterator */ /* Array of threadblocks */ grid->threadblock_count = function->group_count; grid->threadblock_id_first = 0; grid->threadblock_id_last = grid->threadblock_count - 1; grid->threadblocks = calloc(grid->threadblock_count, sizeof(void *)); for (bid = 0; bid < grid->threadblock_count; bid++) grid->threadblocks[bid] = frm_threadblock_create(); /* Array of warps */ grid->warps_per_threadblock = (function->local_size + frm_emu_warp_size - 1) / frm_emu_warp_size; grid->warp_count = grid->warps_per_threadblock * grid->threadblock_count; grid->warp_id_first = 0; grid->warp_id_last = grid->warp_count - 1; assert(grid->warps_per_threadblock > 0 && grid->warp_count > 0); grid->warps = calloc(grid->warp_count, sizeof(void *)); for (wid = 0; wid < grid->warp_count; wid++) { bid = wid / grid->warps_per_threadblock; grid->warps[wid] = frm_warp_create(); warp = grid->warps[wid]; threadblock = grid->threadblocks[bid]; warp->id = wid; warp->id_in_threadblock = wid % grid->warps_per_threadblock; warp->grid = grid; warp->threadblock = threadblock; DOUBLE_LINKED_LIST_INSERT_TAIL(threadblock, running, warp); } /* Array of threads */ grid->thread_count = function->global_size; grid->thread_id_first = 0; grid->thread_id_last = grid->thread_count - 1; grid->threads = calloc(grid->thread_count, sizeof(void *)); tid = 0; bid = 0; for (bidz = 0; bidz < function->group_count3[2]; bidz++) { for (bidy = 0; bidy < function->group_count3[1]; bidy++) { for (bidx = 0; bidx < function->group_count3[0]; bidx++) { /* Assign threadblock ID */ threadblock = grid->threadblocks[bid]; threadblock->grid = grid; threadblock->id_3d[0] = bidx; threadblock->id_3d[1] = bidy; threadblock->id_3d[2] = bidz; threadblock->id = bid; frm_threadblock_set_status(threadblock, frm_threadblock_pending); /* First, last, and number of threads in threadblock */ threadblock->thread_id_first = tid; threadblock->thread_id_last = tid + function->local_size - 1; threadblock->thread_count = function->local_size; threadblock->threads = &grid->threads[tid]; snprintf(threadblock->name, sizeof(threadblock->name), "threadblock[i%d-i%d]", threadblock->thread_id_first, threadblock->thread_id_last); /* First ,last, and number of warps in threadblock */ threadblock->warp_id_first = bid * grid->warps_per_threadblock; threadblock->warp_id_last = threadblock->warp_id_first + grid->warps_per_threadblock - 1; threadblock->warp_count = grid->warps_per_threadblock; threadblock->warps = &grid->warps[threadblock->warp_id_first]; /* Iterate through threads */ lid = 0; for (lidz = 0; lidz < function->local_size3[2]; lidz++) { for (lidy = 0; lidy < function->local_size3[1]; lidy++) { for (lidx = 0; lidx < function->local_size3[0]; lidx++) { /* Warp ID */ wid = bid * grid->warps_per_threadblock + lid / frm_emu_warp_size; assert(wid < grid->warp_count); warp = grid->warps[wid]; /* Create thread */ grid->threads[tid] = frm_thread_create(); thread = grid->threads[tid]; thread->grid = grid; /* Global IDs */ thread->id_3d[0] = bidx * function->local_size3[0] + lidx; thread->id_3d[1] = bidy * function->local_size3[1] + lidy; thread->id_3d[2] = bidz * function->local_size3[2] + lidz; thread->id = tid; /* Local IDs */ thread->id_in_threadblock_3d[0] = lidx; thread->id_in_threadblock_3d[1] = lidy; thread->id_in_threadblock_3d[2] = lidz; thread->id_in_threadblock = lid; /* Other */ thread->id_in_warp = thread->id_in_threadblock % frm_emu_warp_size; thread->threadblock = grid->threadblocks[bid]; thread->warp = grid->warps[wid]; /* First, last, and number of threads in warp */ if (!warp->thread_count) { warp->thread_id_first = tid; warp->threads = &grid->threads[tid]; } warp->thread_count++; warp->thread_id_last = tid; bit_map_set(warp->active_stack, thread->id_in_warp, 1, 1); /* Save local IDs in register R0 */ thread->sr[FRM_SR_Tid_X].v.i = lidx; /* R0.x */ thread->sr[FRM_SR_Tid_Y].v.i = lidy; /* R0.y */ thread->sr[FRM_SR_Tid_Z].v.i = lidz; /* R0.z */ /* Save threadblock IDs in register R1 */ thread->sr[FRM_SR_CTAid_X].v.i = bidx; /* R1.x */ thread->sr[FRM_SR_CTAid_Y].v.i = bidy; /* R1.y */ thread->sr[FRM_SR_CTAid_Z].v.i = bidz; /* R1.z */ /* Next thread */ tid++; lid++; } } } /* Next threadblock */ bid++; } } } /* Assign names to warps */ for (wid = 0; wid < grid->warp_count; wid++) { warp = grid->warps[wid]; snprintf(warp->name, sizeof(warp->name), "warp[i%d-i%d]", warp->thread_id_first, warp->thread_id_last); /* Initialize warp program counter */ warp->buf_start = function->function_buffer.ptr; warp->buf = warp->buf_start; warp->buf_size = function->function_buffer.size; } /* Debug */ printf("local_size = %d (%d,%d,%d)\n", function->local_size, function->local_size3[0], function->local_size3[1], function->local_size3[2]); printf("global_size = %d (%d,%d,%d)\n", function->global_size, function->global_size3[0], function->global_size3[1], function->global_size3[2]); printf("group_count = %d (%d,%d,%d)\n", function->group_count, function->group_count3[0], function->group_count3[1], function->group_count3[2]); printf("warp_count = %d\n", grid->warp_count); printf("warps_per_threadblock = %d\n", grid->warps_per_threadblock); printf(" tid tid2 tid1 tid0 bid bid2 bid1 bid0 lid lid2 lid1 lid0 warp work-group\n"); for (tid = 0; tid < grid->thread_count; tid++) { thread = grid->threads[tid]; warp = thread->warp; threadblock = thread->threadblock; printf("%4d %4d %4d %4d ", thread->id, thread->id_3d[2], thread->id_3d[1], thread->id_3d[0]); printf("%4d %4d %4d %4d ", threadblock->id, threadblock->id_3d[2], threadblock->id_3d[1], threadblock->id_3d[0]); printf("%4d %4d %4d %4d ", thread->id_in_threadblock, thread->id_in_threadblock_3d[2], thread->id_in_threadblock_3d[1], thread->id_in_threadblock_3d[0]); printf("%20s.%-4d ", warp->name, thread->id_in_warp); printf("%20s.%-4d\n", threadblock->name, thread->id_in_threadblock); } }
void evg_faults_insert(void) { struct evg_fault_t *fault; struct evg_compute_unit_t *compute_unit; for (;;) { linked_list_head(evg_fault_list); fault = linked_list_get(evg_fault_list); if (!fault || fault->cycle > evg_gpu->cycle) break; /* Insert fault depending on fault type */ switch (fault->type) { case evg_fault_ams: { struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; struct evg_work_item_t *work_item; int work_group_id; /* in compute unit */ int wavefront_id; /* in compute unit */ int value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->stack_id, fault->active_mask_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */ work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group; wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group; if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit || !compute_unit->work_groups[work_group_id]) { evg_faults_debug("effect=\"wf_idle\""); goto end_loop; } work_group = compute_unit->work_groups[work_group_id]; wavefront = work_group->wavefronts[wavefront_id]; /* If active_mask_id exceeds stack top, dismiss */ if (fault->active_mask_id > wavefront->stack_top) { evg_faults_debug("effect=\"am_idle\""); goto end_loop; } /* If 'bit' exceeds number of work-items in wavefront, dismiss */ if (fault->bit >= wavefront->work_item_count) { evg_faults_debug("effect=\"wi_idle\""); goto end_loop; } /* Fault caused an error, show affected software entities */ work_item = wavefront->work_items[fault->bit]; evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d", work_group->id, wavefront->id, work_item->id); /* Inject fault */ value = bit_map_get(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1); bit_map_set(wavefront->active_stack, fault->active_mask_id * wavefront->work_item_count + fault->bit, 1, !value); evg_fault_errors++; break; } case evg_fault_reg: { struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel; int work_group_id_in_compute_unit; struct evg_work_group_t *work_group; struct evg_wavefront_t *wavefront; int num_registers_per_work_group; int work_item_id_in_compute_unit; int work_item_id_in_work_group; struct evg_work_item_t *work_item; struct linked_list_t *fetch_queue; struct evg_uop_t *inst_buffer; struct evg_uop_t *exec_buffer; struct heap_t *event_queue; struct evg_uop_t *uop; int lo_reg; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->reg_id, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Get work-group */ num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used * kernel->local_size; work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"reg_idle\""); goto end_loop; } /* Get affected entities */ work_item_id_in_compute_unit = fault->reg_id / kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size; work_item = work_group->work_items[work_item_id_in_work_group]; wavefront = work_item->wavefront; lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used; /* Fault falling between Fetch and Read stage of an instruction * consuming register. This case cannot be modeled due to functional * simulation skew. */ fetch_queue = compute_unit->alu_engine.fetch_queue; inst_buffer = compute_unit->alu_engine.inst_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_read\""); goto end_loop; } /* Fault falling between Fetch and Write stage of an instruction * writing on the register. The instruction will overwrite the fault, * so this shouldn't cause its injection. */ exec_buffer = compute_unit->alu_engine.exec_buffer; for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue); linked_list_next(fetch_queue)) { uop = linked_list_get(fetch_queue); if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } uop = inst_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } uop = exec_buffer; if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } event_queue = compute_unit->alu_engine.event_queue; for (heap_first(event_queue, (void **) &uop); uop; heap_next(event_queue, (void **) &uop)) { if (evg_stack_faults_is_odep(uop, wavefront, lo_reg)) { evg_faults_debug("effect=\"reg_write\""); goto end_loop; } } /* Fault caused error */ evg_faults_debug("effect=\"error\" "); evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ", work_group->id, work_item->wavefront->id, work_item->id, lo_reg); /* Insert the fault */ if (fault->bit < 32) work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit; else if (fault->bit < 64) work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32); else if (fault->bit < 96) work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64); else work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96); evg_fault_errors++; break; } case evg_fault_mem: { struct evg_work_group_t *work_group; int work_group_id_in_compute_unit; unsigned char value; /* Initial debug */ evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ", evg_gpu->cycle, fault->compute_unit_id, fault->byte, fault->bit); assert(fault->cycle == evg_gpu->cycle); compute_unit = evg_gpu->compute_units[fault->compute_unit_id]; /* If compute unit is idle, dismiss */ if (!compute_unit->work_group_count) { evg_faults_debug("effect=\"cu_idle\""); goto end_loop; } /* Check if there is any local memory used at all */ if (!evg_gpu->ndrange->local_mem_top) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group */ work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top; if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Get work-group (again) */ work_group = compute_unit->work_groups[work_group_id_in_compute_unit]; if (!work_group) { evg_faults_debug("effect=\"mem_idle\""); goto end_loop; } /* Inject fault */ evg_faults_debug("effect=\"error\" wg=%d ", work_group->id); mem_read(work_group->local_mem, fault->byte, 1, &value); value ^= 1 << fault->bit; mem_write(work_group->local_mem, fault->byte, 1, &value); evg_fault_errors++; break; } default: panic("invalid fault type"); } end_loop: /* Extract and free */ free(fault); linked_list_remove(evg_fault_list); evg_faults_debug("\n"); /* If all faults were inserted and no error was caused, end simulation */ if (!linked_list_count(evg_fault_list) && !evg_fault_errors) esim_finish = esim_finish_evg_no_faults; } }