Пример #1
0
//{{{ void test_bit_map(void)
void test_bit_map(void)
{
    struct bit_map *bm = bit_map_init(100);

    TEST_ASSERT_EQUAL(0, bit_map_get(bm, 1000));

    bit_map_set(bm, 1000);

    TEST_ASSERT_EQUAL(1, bit_map_get(bm, 1000));


    uint32_t A[500];

    uint32_t i;

    A[0] = 1000;

    for (i = 1; i < 500; ++i) {
        A[i] = rand() % 10000;
        bit_map_set(bm, A[i]);
    }

    qsort(A, 500, sizeof(uint32_t), uint32_t_cmp);

    uint32_t j;
    for (i = 0; i < 499; ++i)  {
        TEST_ASSERT_EQUAL(1, bit_map_get(bm, A[i]));
        for (j = A[i]+1; j < A[i+1]; ++j)
            TEST_ASSERT_EQUAL(0, bit_map_get(bm, j));
    }

    char *file_name = "test_bit_map.out";

    FILE *f = fopen(file_name, "wb");

    bit_map_store(bm, f, file_name);

    fclose(f);

    f = fopen(file_name, "rb");
    struct bit_map *bm_r = bit_map_load(f, file_name);
    fclose(f);

    TEST_ASSERT_EQUAL(bm->num_bits, bm_r->num_bits);
    TEST_ASSERT_EQUAL(bm->num_ints, bm_r->num_ints);

    for (i = 0; i < 499; ++i)  {
        TEST_ASSERT_EQUAL(1, bit_map_get(bm_r, A[i]));
        for (j = A[i]+1; j < A[i+1]; ++j)
            TEST_ASSERT_EQUAL(0, bit_map_get(bm_r, j));
    }


    bit_map_destroy(&bm);
    bit_map_destroy(&bm_r);
    remove(file_name);
}
Пример #2
0
static void X86ContextDoCreate(X86Context *self, X86Emu *emu)
{
	int num_nodes;
	int i;
	
	/* Initialize */
	self->emu = emu;
	self->pid = emu->current_pid++;
	self->sched_policy = SCHED_RR;
	self->sched_priority = 1;  /* Lowest priority */

	/* Update state so that the context is inserted in the
	 * corresponding lists. The x86_ctx_running parameter has no
	 * effect, since it will be updated later. */
	X86ContextSetState(self, X86ContextRunning);
	DOUBLE_LINKED_LIST_INSERT_HEAD(emu, context, self);

	/* Structures */
	self->regs = x86_regs_create();
	self->backup_regs = x86_regs_create();
	self->signal_mask_table = x86_signal_mask_table_create();

	/* Thread affinity mask, used only for timing simulation. It is
	 * initialized to all 1's. */
	num_nodes = x86_cpu_num_cores * x86_cpu_num_threads;
	self->affinity = bit_map_create(num_nodes);
	for (i = 0; i < num_nodes; i++)
		bit_map_set(self->affinity, i, 1, 1);

	/* Initialize statically allocate instruction */
	new_static(&self->inst, X86Inst, emu->as);

	/* Virtual functions */
	asObject(self)->Dump = X86ContextDump;
}
Пример #3
0
void si_work_item_set_pred(struct si_work_item_t *work_item, int pred)
{
	struct si_wavefront_t *wavefront = work_item->wavefront;

	assert(work_item->id_in_wavefront >= 0 && work_item->id_in_wavefront < wavefront->work_item_count);
	bit_map_set(wavefront->pred, work_item->id_in_wavefront, 1, !!pred);
	wavefront->pred_mask_update = 1;
}
Пример #4
0
void KplWarpCreate(KplWarp *self, int id, KplThreadBlock *thread_block,
		KplGrid *grid)
{
	KplEmu *emu = grid->emu;

	/* Initialization */
	self->id = id + thread_block->id * thread_block->warp_count;
	self->id_in_thread_block = id;
	self->grid = grid;
	self->thread_block = thread_block;

	/* Allocate threads */
	if (id < thread_block->warp_count - 1)
		self->thread_count = kpl_emu_warp_size;
	else
		self->thread_count = grid->thread_block_size -
		(thread_block->warp_count - 1) * kpl_emu_warp_size;
	self->threads = (KplThread **) xcalloc(self->thread_count,
			sizeof(KplThread *));

	/* Instruction */
	self->inst = KplInstWrapCreate(emu->as);
	self->inst_size = 8;
	self->inst_buffer = grid->function->inst_bin;
	self->inst_buffer_size = grid->function->inst_bin_size;

	/* Sync stack */
	self->sync_stack_top = 0;
	self->sync_stack.entries[self->sync_stack_top].active_thread_mask =
			bit_map_create(self->thread_count);
	bit_map_set(self->sync_stack.entries[self->sync_stack_top].
			active_thread_mask, 0, self->thread_count,
			((unsigned long long)1 << self->thread_count) - 1);

	/* Reset flags */
	self->at_barrier = 0;
	self->finished_thread_count = 0;
	self->finished = 0;
}
Пример #5
0
void frm_grid_setup_threads(struct frm_grid_t *grid)
{
	struct frm_cuda_function_t *function = grid->function;

	struct frm_threadblock_t *threadblock;
	struct frm_warp_t *warp;
	struct frm_thread_t *thread;

	int bidx, bidy, bidz;  /* 3D threadblock ID iterators */
	int lidx, lidy, lidz;  /* 3D thread local ID iterators */

	int tid;  /* Global ID iterator */
	int bid;  /* Threadblock ID iterator */
	int wid;  /* Warp ID iterator */
	int lid;  /* Local ID iterator */

	/* Array of threadblocks */
	grid->threadblock_count = function->group_count;
	grid->threadblock_id_first = 0;
	grid->threadblock_id_last = grid->threadblock_count - 1;
	grid->threadblocks = calloc(grid->threadblock_count, sizeof(void *));
	for (bid = 0; bid < grid->threadblock_count; bid++)
		grid->threadblocks[bid] = frm_threadblock_create();
	
	/* Array of warps */
	grid->warps_per_threadblock = (function->local_size + frm_emu_warp_size - 1) / frm_emu_warp_size;
	grid->warp_count = grid->warps_per_threadblock * grid->threadblock_count;
	grid->warp_id_first = 0;
	grid->warp_id_last = grid->warp_count - 1;
	assert(grid->warps_per_threadblock > 0 && grid->warp_count > 0);
	grid->warps = calloc(grid->warp_count, sizeof(void *));
	for (wid = 0; wid < grid->warp_count; wid++)
	{
		bid = wid / grid->warps_per_threadblock;
		grid->warps[wid] = frm_warp_create();
		warp = grid->warps[wid];
		threadblock = grid->threadblocks[bid];

		warp->id = wid;
		warp->id_in_threadblock = wid % grid->warps_per_threadblock;
		warp->grid = grid;
		warp->threadblock = threadblock;
		DOUBLE_LINKED_LIST_INSERT_TAIL(threadblock, running, warp);
	}

	/* Array of threads */
	grid->thread_count = function->global_size;
	grid->thread_id_first = 0;
	grid->thread_id_last = grid->thread_count - 1;
	grid->threads = calloc(grid->thread_count, sizeof(void *));
	tid = 0;
	bid = 0;
	for (bidz = 0; bidz < function->group_count3[2]; bidz++)
	{
		for (bidy = 0; bidy < function->group_count3[1]; bidy++)
		{
			for (bidx = 0; bidx < function->group_count3[0]; bidx++)
			{
				/* Assign threadblock ID */
				threadblock = grid->threadblocks[bid];
				threadblock->grid = grid;
				threadblock->id_3d[0] = bidx;
				threadblock->id_3d[1] = bidy;
				threadblock->id_3d[2] = bidz;
				threadblock->id = bid;
				frm_threadblock_set_status(threadblock, frm_threadblock_pending);

				/* First, last, and number of threads in threadblock */
				threadblock->thread_id_first = tid;
				threadblock->thread_id_last = tid + function->local_size - 1;
				threadblock->thread_count = function->local_size;
				threadblock->threads = &grid->threads[tid];
				snprintf(threadblock->name, sizeof(threadblock->name), "threadblock[i%d-i%d]",
					threadblock->thread_id_first, threadblock->thread_id_last);

				/* First ,last, and number of warps in threadblock */
				threadblock->warp_id_first = bid * grid->warps_per_threadblock;
				threadblock->warp_id_last = threadblock->warp_id_first + grid->warps_per_threadblock - 1;
				threadblock->warp_count = grid->warps_per_threadblock;
				threadblock->warps = &grid->warps[threadblock->warp_id_first];

				/* Iterate through threads */
				lid = 0;
				for (lidz = 0; lidz < function->local_size3[2]; lidz++)
				{
					for (lidy = 0; lidy < function->local_size3[1]; lidy++)
					{
						for (lidx = 0; lidx < function->local_size3[0]; lidx++)
						{
							/* Warp ID */
							wid = bid * grid->warps_per_threadblock +
								lid / frm_emu_warp_size;
							assert(wid < grid->warp_count);
							warp = grid->warps[wid];
							
							/* Create thread */
							grid->threads[tid] = frm_thread_create();
							thread = grid->threads[tid];
							thread->grid = grid;

							/* Global IDs */
							thread->id_3d[0] = bidx * function->local_size3[0] + lidx;
							thread->id_3d[1] = bidy * function->local_size3[1] + lidy;
							thread->id_3d[2] = bidz * function->local_size3[2] + lidz;
							thread->id = tid;

							/* Local IDs */
							thread->id_in_threadblock_3d[0] = lidx;
							thread->id_in_threadblock_3d[1] = lidy;
							thread->id_in_threadblock_3d[2] = lidz;
							thread->id_in_threadblock = lid;

							/* Other */
							thread->id_in_warp = thread->id_in_threadblock % frm_emu_warp_size;
							thread->threadblock = grid->threadblocks[bid];
							thread->warp = grid->warps[wid];

							/* First, last, and number of threads in warp */
							if (!warp->thread_count) {
								warp->thread_id_first = tid;
								warp->threads = &grid->threads[tid];
							}
							warp->thread_count++;
							warp->thread_id_last = tid;
							bit_map_set(warp->active_stack, thread->id_in_warp, 1, 1);

                                                        /* Save local IDs in register R0 */
                                                        thread->sr[FRM_SR_Tid_X].v.i = lidx;  /* R0.x */
                                                        thread->sr[FRM_SR_Tid_Y].v.i = lidy;  /* R0.y */
                                                        thread->sr[FRM_SR_Tid_Z].v.i = lidz;  /* R0.z */

                                                        /* Save threadblock IDs in register R1 */
                                                        thread->sr[FRM_SR_CTAid_X].v.i = bidx;  /* R1.x */
                                                        thread->sr[FRM_SR_CTAid_Y].v.i = bidy;  /* R1.y */
                                                        thread->sr[FRM_SR_CTAid_Z].v.i = bidz;  /* R1.z */

							/* Next thread */
							tid++;
							lid++;
						}
					}
				}

				/* Next threadblock */
				bid++;
			}
		}
	}

	/* Assign names to warps */
	for (wid = 0; wid < grid->warp_count; wid++)
	{
		warp = grid->warps[wid];
		snprintf(warp->name, sizeof(warp->name), "warp[i%d-i%d]",
			warp->thread_id_first, warp->thread_id_last);

		/* Initialize warp program counter */
                warp->buf_start = function->function_buffer.ptr;
                warp->buf = warp->buf_start;
                warp->buf_size = function->function_buffer.size;
	}

	/* Debug */
	printf("local_size = %d (%d,%d,%d)\n", function->local_size, function->local_size3[0],
		function->local_size3[1], function->local_size3[2]);
	printf("global_size = %d (%d,%d,%d)\n", function->global_size, function->global_size3[0],
		function->global_size3[1], function->global_size3[2]);
	printf("group_count = %d (%d,%d,%d)\n", function->group_count, function->group_count3[0],
		function->group_count3[1], function->group_count3[2]);
	printf("warp_count = %d\n", grid->warp_count);
	printf("warps_per_threadblock = %d\n", grid->warps_per_threadblock);
	printf(" tid tid2 tid1 tid0   bid bid2 bid1 bid0   lid lid2 lid1 lid0  warp            work-group\n");
	for (tid = 0; tid < grid->thread_count; tid++)
	{
		thread = grid->threads[tid];
		warp = thread->warp;
		threadblock = thread->threadblock;
		printf("%4d %4d %4d %4d  ", thread->id, thread->id_3d[2],
			thread->id_3d[1], thread->id_3d[0]);
		printf("%4d %4d %4d %4d  ", threadblock->id, threadblock->id_3d[2],
			threadblock->id_3d[1], threadblock->id_3d[0]);
		printf("%4d %4d %4d %4d  ", thread->id_in_threadblock, thread->id_in_threadblock_3d[2],
			thread->id_in_threadblock_3d[1], thread->id_in_threadblock_3d[0]);
		printf("%20s.%-4d  ", warp->name, thread->id_in_warp);
		printf("%20s.%-4d\n", threadblock->name, thread->id_in_threadblock);
	}

}
Пример #6
0
void evg_faults_insert(void)
{
	struct evg_fault_t *fault;
	struct evg_compute_unit_t *compute_unit;

	for (;;)
	{
		linked_list_head(evg_fault_list);
		fault = linked_list_get(evg_fault_list);
		if (!fault || fault->cycle > evg_gpu->cycle)
			break;

		/* Insert fault depending on fault type */
		switch (fault->type)
		{

		case evg_fault_ams:
		{
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;
			struct evg_work_item_t *work_item;

			int work_group_id;  /* in compute unit */
			int wavefront_id;  /* in compute unit */
			int value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id, fault->stack_id,
				fault->active_mask_id, fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */
			work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group;
			wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group;
			if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit
				|| !compute_unit->work_groups[work_group_id])
			{
				evg_faults_debug("effect=\"wf_idle\"");
				goto end_loop;
			}
			work_group = compute_unit->work_groups[work_group_id];
			wavefront = work_group->wavefronts[wavefront_id];

			/* If active_mask_id exceeds stack top, dismiss */
			if (fault->active_mask_id > wavefront->stack_top)
			{
				evg_faults_debug("effect=\"am_idle\"");
				goto end_loop;
			}

			/* If 'bit' exceeds number of work-items in wavefront, dismiss */
			if (fault->bit >= wavefront->work_item_count)
			{
				evg_faults_debug("effect=\"wi_idle\"");
				goto end_loop;
			}

			/* Fault caused an error, show affected software entities */
			work_item = wavefront->work_items[fault->bit];
			evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d",
				work_group->id,
				wavefront->id,
				work_item->id);

			/* Inject fault */
			value = bit_map_get(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1);
			bit_map_set(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1, !value);
			evg_fault_errors++;

			break;
		}

		case evg_fault_reg:
		{
			struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel;

			int work_group_id_in_compute_unit;
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;

			int num_registers_per_work_group;

			int work_item_id_in_compute_unit;
			int work_item_id_in_work_group;
			struct evg_work_item_t *work_item;

			struct linked_list_t *fetch_queue;
			struct evg_uop_t *inst_buffer;
			struct evg_uop_t *exec_buffer;
			struct heap_t *event_queue;
			struct evg_uop_t *uop;

			int lo_reg;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->reg_id,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used
				* kernel->local_size;
			work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get affected entities */
			work_item_id_in_compute_unit = fault->reg_id
				/ kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;
			work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size;
			work_item = work_group->work_items[work_item_id_in_work_group];
			wavefront = work_item->wavefront;
			lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;

			/* Fault falling between Fetch and Read stage of an instruction
			 * consuming register. This case cannot be modeled due to functional
			 * simulation skew. */
			fetch_queue = compute_unit->alu_engine.fetch_queue;
			inst_buffer = compute_unit->alu_engine.inst_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_idep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_read\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_read\"");
				goto end_loop;
			}

			/* Fault falling between Fetch and Write stage of an instruction
			 * writing on the register. The instruction will overwrite the fault,
			 * so this shouldn't cause its injection. */
			exec_buffer = compute_unit->alu_engine.exec_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			uop = exec_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			event_queue = compute_unit->alu_engine.event_queue;
			for (heap_first(event_queue, (void **) &uop); uop;
				heap_next(event_queue, (void **) &uop))
			{
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}

			/* Fault caused error */
			evg_faults_debug("effect=\"error\" ");
			evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ",
				work_group->id, work_item->wavefront->id, work_item->id, lo_reg);

			/* Insert the fault */
			if (fault->bit < 32)
				work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit;
			else if (fault->bit < 64)
				work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32);
			else if (fault->bit < 96)
				work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64);
			else
				work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96);
			evg_fault_errors++;

			break;

		}

		case evg_fault_mem:
		{
			struct evg_work_group_t *work_group;

			int work_group_id_in_compute_unit;
			unsigned char value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->byte,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Check if there is any local memory used at all */
			if (!evg_gpu->ndrange->local_mem_top)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Inject fault */
			evg_faults_debug("effect=\"error\" wg=%d ",
				work_group->id);
			mem_read(work_group->local_mem, fault->byte, 1, &value);
			value ^= 1 << fault->bit;
			mem_write(work_group->local_mem, fault->byte, 1, &value);
			evg_fault_errors++;

			break;

		}

		default:
			panic("invalid fault type");

		}

end_loop:
		/* Extract and free */
		free(fault);
		linked_list_remove(evg_fault_list);
		evg_faults_debug("\n");

		/* If all faults were inserted and no error was caused, end simulation */
		if (!linked_list_count(evg_fault_list) && !evg_fault_errors)
			esim_finish = esim_finish_evg_no_faults;
	}
}