Exemple #1
0
static void
tree_output()
{
     Symbol **symbols, *main_sym;
     size_t i, num;
     cflow_depmap_t depmap;
     
     /* Collect functions and assign them ordinal numbers */
     num = collect_functions(&symbols);
     for (i = 0; i < num; i++)
	  symbols[i]->ord = i;
     
     /* Create a dependency matrix */
     depmap = depmap_alloc(num);
     for (i = 0; i < num; i++) {
	  if (symbols[i]->callee) {
	       struct linked_list_entry *p;
	       
	       for (p = linked_list_head(symbols[i]->callee); p;
		    p = p->next) {
		    Symbol *s = (Symbol*) p->data;
		    if (symbol_is_function(s))
			 depmap_set(depmap, i, ((Symbol*)p->data)->ord);
	       }		    
	  }
     }
     
     depmap_tc(depmap);

     /* Mark recursive calls */
     for (i = 0; i < num; i++)
	  if (depmap_isset(depmap, i, i))
	       symbols[i]->recursive = 1;
     free(depmap);
     free(symbols);
     
     /* Collect and sort all symbols */
     num = collect_symbols(&symbols, is_var, 0);
     qsort(symbols, num, sizeof(*symbols), compare);
	       
     /* Produce output */
     begin();
    
     if (reverse_tree) {
	  for (i = 0; i < num; i++) {
	       inverted_tree(0, 0, symbols[i]);
	       separator();
	  }
     } else {
	  main_sym = lookup(start_name);
	  if (main_sym) {
	       direct_tree(0, 0, main_sym);
	       separator();
	  } else {
	       for (i = 0; i < num; i++) {
		    if (symbols[i]->callee == NULL)
			 continue;
		    direct_tree(0, 0, symbols[i]);
		    separator();
	       }
	  }
     }
     
     end();
     
     free(symbols);
}
Exemple #2
0
static int X86ThreadIssuePreQ(X86Thread *self, int quantum) {
  X86Core *core = self->core;
  X86Cpu *cpu = self->cpu;

  struct linked_list_t *preq = self->preq;
  struct x86_uop_t *prefetch;

  /* Process preq */
  linked_list_head(preq);
  while (!linked_list_is_end(preq) && quantum) {
    /* Get element from prefetch queue. If it is not ready, go to the next one
     */
    prefetch = linked_list_get(preq);
    if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch)) {
      linked_list_next(preq);
      continue;
    }

    /*
     * Make sure its not been prefetched recently. This is just to avoid
     * unnecessary
     * memory traffic. Even though the cache will realise a "hit" on redundant
     * prefetches, its still helpful to avoid going to the memory (cache).
     */
    if (prefetch_history_is_redundant(core->prefetch_history, self->data_mod,
                                      prefetch->phy_addr)) {
      /* remove from queue. do not prefetch. */
      assert(prefetch->uinst->opcode == x86_uinst_prefetch);
      X86ThreadRemovePreQ(self);
      prefetch->completed = 1;
      x86_uop_free_if_not_queued(prefetch);
      continue;
    }

    prefetch->ready = 1;

    /* Check that memory system is accessible */
    if (!mod_can_access(self->data_mod, prefetch->phy_addr)) {
      linked_list_next(preq);
      continue;
    }

    /* Remove from prefetch queue */
    assert(prefetch->uinst->opcode == x86_uinst_prefetch);
    X86ThreadRemovePreQ(self);

    /* Access memory system */
    mod_access(self->data_mod, mod_access_prefetch, prefetch->phy_addr, NULL,
               core->event_queue, prefetch, NULL);

    /* Record prefetched address */
    prefetch_history_record(core->prefetch_history, prefetch->phy_addr);

    /* The cache system will place the prefetch at the head of the
     * event queue when it is ready. For now, mark "in_event_queue" to
     * prevent the uop from being freed. */
    prefetch->in_event_queue = 1;
    prefetch->issued = 1;
    prefetch->issue_when = asTiming(cpu)->cycle;

    /* Statistics */
    core->num_issued_uinst_array[prefetch->uinst->opcode]++;
    core->lsq_reads++;
    core->reg_file_int_reads += prefetch->ph_int_idep_count;
    core->reg_file_fp_reads += prefetch->ph_fp_idep_count;
    self->num_issued_uinst_array[prefetch->uinst->opcode]++;
    self->lsq_reads++;
    self->reg_file_int_reads += prefetch->ph_int_idep_count;
    self->reg_file_fp_reads += prefetch->ph_fp_idep_count;
    cpu->num_issued_uinst_array[prefetch->uinst->opcode]++;
    if (prefetch->trace_cache) self->trace_cache->num_issued_uinst++;

    /* One more instruction issued, update quantum. */
    quantum--;

    /* MMU statistics */
    if (*mmu_report_file_name)
      mmu_access_page(prefetch->phy_addr, mmu_access_read);

    /* Trace */
    x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", prefetch->id_in_core,
              core->id);
  }

  return quantum;
}
Exemple #3
0
static int X86ThreadIssueIQ(X86Thread *self, int quant) {
  X86Cpu *cpu = self->cpu;
  X86Core *core = self->core;

  struct linked_list_t *iq = self->iq;
  struct x86_uop_t *uop;
  int lat;

  /* Find instruction to issue */
  linked_list_head(iq);
  while (!linked_list_is_end(iq) && quant) {
    /* Get element from IQ */
    uop = linked_list_get(iq);
    assert(x86_uop_exists(uop));
    assert(!(uop->flags & X86_UINST_MEM));
    if (!uop->ready && !X86ThreadIsUopReady(self, uop)) {
      linked_list_next(iq);
      continue;
    }
    uop->ready = 1; /* avoid next call to 'X86ThreadIsUopReady' */

    /* Run the instruction in its corresponding functional unit.
     * If the instruction does not require a functional unit,
     * 'X86CoreReserveFunctionalUnit'
     * returns 1 cycle latency. If there is no functional unit available,
     * 'X86CoreReserveFunctionalUnit' returns 0. */
    lat = X86CoreReserveFunctionalUnit(core, uop);
    if (!lat) {
      linked_list_next(iq);
      continue;
    }

    /* Instruction was issued to the corresponding fu.
     * Remove it from IQ */
    X86ThreadRemoveFromIQ(self);

    /* Schedule inst in Event Queue */
    assert(!uop->in_event_queue);
    assert(lat > 0);
    uop->issued = 1;
    uop->issue_when = asTiming(cpu)->cycle;
    uop->when = asTiming(cpu)->cycle + lat;
    X86CoreInsertInEventQueue(core, uop);

    /* Statistics */
    core->num_issued_uinst_array[uop->uinst->opcode]++;
    core->iq_reads++;
    core->reg_file_int_reads += uop->ph_int_idep_count;
    core->reg_file_fp_reads += uop->ph_fp_idep_count;
    self->num_issued_uinst_array[uop->uinst->opcode]++;
    self->iq_reads++;
    self->reg_file_int_reads += uop->ph_int_idep_count;
    self->reg_file_fp_reads += uop->ph_fp_idep_count;
    cpu->num_issued_uinst_array[uop->uinst->opcode]++;
    if (uop->trace_cache) self->trace_cache->num_issued_uinst++;

    /* One more instruction issued, update quantum. */
    quant--;

    /* Trace */
    x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", uop->id_in_core,
              core->id);
  }

  return quant;
}
Exemple #4
0
static int x86_cpu_issue_lq(int core, int thread, int quant)
{
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *load;

	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !x86_reg_file_ready(load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		x86_lq_remove(core, thread);

		/* Access memory system */
		mod_access(X86_THREAD.data_mod, mod_access_load,
			load->phy_addr, NULL, X86_CORE.event_queue, load);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		load->in_event_queue = 1;
		load->issued = 1;
		load->issue_when = x86_cpu->cycle;
		
		/* Instruction issued */
		X86_CORE.issued[load->uinst->opcode]++;
		X86_CORE.lsq_reads++;
		X86_CORE.reg_file_int_reads += load->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count;
		X86_THREAD.issued[load->uinst->opcode]++;
		X86_THREAD.lsq_reads++;
		X86_THREAD.reg_file_int_reads += load->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count;
		x86_cpu->issued[load->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, load->core);
	}
	
	return quant;
}
Exemple #5
0
static int X86ThreadIssueLQ(X86Thread *self, int quant) {
  X86Core *core = self->core;
  X86Cpu *cpu = self->cpu;

  struct linked_list_t *lq = self->lq;
  struct x86_uop_t *load;
  struct mod_client_info_t *client_info;

  /* Process lq */
  linked_list_head(lq);
  while (!linked_list_is_end(lq) && quant) {
    /* Get element from load queue. If it is not ready, go to the next one */
    load = linked_list_get(lq);
    if (!load->ready && !X86ThreadIsUopReady(self, load)) {
      linked_list_next(lq);
      continue;
    }
    load->ready = 1;

    /* Check that memory system is accessible */
    if (!mod_can_access(self->data_mod, load->phy_addr)) {
      linked_list_next(lq);
      continue;
    }

    /* Remove from load queue */
    assert(load->uinst->opcode == x86_uinst_load);
    X86ThreadRemoveFromLQ(self);

    /* create and fill the mod_client_info_t object */
    client_info = mod_client_info_create(self->data_mod);
    client_info->prefetcher_eip = load->eip;

    /* Access memory system */
    mod_access(self->data_mod, mod_access_load, load->phy_addr, NULL,
               core->event_queue, load, client_info);

    /* The cache system will place the load at the head of the
     * event queue when it is ready. For now, mark "in_event_queue" to
     * prevent the uop from being freed. */
    load->in_event_queue = 1;
    load->issued = 1;
    load->issue_when = asTiming(cpu)->cycle;

    /* Statistics */
    core->num_issued_uinst_array[load->uinst->opcode]++;
    core->lsq_reads++;
    core->reg_file_int_reads += load->ph_int_idep_count;
    core->reg_file_fp_reads += load->ph_fp_idep_count;
    self->num_issued_uinst_array[load->uinst->opcode]++;
    self->lsq_reads++;
    self->reg_file_int_reads += load->ph_int_idep_count;
    self->reg_file_fp_reads += load->ph_fp_idep_count;
    cpu->num_issued_uinst_array[load->uinst->opcode]++;
    if (load->trace_cache) self->trace_cache->num_issued_uinst++;

    /* One more instruction issued, update quantum. */
    quant--;

    /* MMU statistics */
    if (*mmu_report_file_name) mmu_access_page(load->phy_addr, mmu_access_read);

    /* Trace */
    x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n", load->id_in_core,
              core->id);
  }

  return quant;
}
Exemple #6
0
static int x86_cpu_issue_iq(int core, int thread, int quant)
{
	struct linked_list_t *iq = X86_THREAD.iq;
	struct x86_uop_t *uop;
	int lat;

	/* Find instruction to issue */
	linked_list_head(iq);
	while (!linked_list_is_end(iq) && quant)
	{
		/* Get element from IQ */
		uop = linked_list_get(iq);
		assert(x86_uop_exists(uop));
		assert(!(uop->flags & X86_UINST_MEM));
		if (!uop->ready && !x86_reg_file_ready(uop))
		{
			linked_list_next(iq);
			continue;
		}
		uop->ready = 1;  /* avoid next call to 'x86_reg_file_ready' */
		
		/* Run the instruction in its corresponding functional unit.
		 * If the instruction does not require a functional unit, 'x86_fu_reserve'
		 * returns 1 cycle latency. If there is no functional unit available,
		 * 'x86_fu_reserve' returns 0. */
		lat = x86_fu_reserve(uop);
		if (!lat)
		{
			linked_list_next(iq);
			continue;
		}
		
		/* Instruction was issued to the corresponding fu.
		 * Remove it from IQ */
		x86_iq_remove(core, thread);
		
		/* Schedule inst in Event Queue */
		assert(!uop->in_event_queue);
		assert(lat > 0);
		uop->issued = 1;
		uop->issue_when = x86_cpu->cycle;
		uop->when = x86_cpu->cycle + lat;
		x86_event_queue_insert(X86_CORE.event_queue, uop);
		
		/* Instruction issued */
		X86_CORE.issued[uop->uinst->opcode]++;
		X86_CORE.iq_reads++;
		X86_CORE.reg_file_int_reads += uop->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += uop->ph_fp_idep_count;
		X86_THREAD.issued[uop->uinst->opcode]++;
		X86_THREAD.iq_reads++;
		X86_THREAD.reg_file_int_reads += uop->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += uop->ph_fp_idep_count;
		x86_cpu->issued[uop->uinst->opcode]++;
		quant--;

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			uop->id_in_core, uop->core);
	}
	
	return quant;
}
Exemple #7
0
void evg_isa_write_task_commit(struct evg_work_item_t *work_item)
{
	struct linked_list_t *task_list = work_item->write_task_list;
	struct evg_wavefront_t *wavefront = work_item->wavefront;
	struct evg_work_group_t *work_group = work_item->work_group;

	struct evg_isa_write_task_t *wt;
	struct evg_inst_t *inst;

	/* Process first tasks of type:
	 *  - EVG_ISA_WRITE_TASK_WRITE_DEST
	 *  - EVG_ISA_WRITE_TASK_WRITE_LDS
	 */
	for (linked_list_head(task_list); !linked_list_is_end(task_list); )
	{

		/* Get task */
		wt = linked_list_get(task_list);
		assert(wt->work_item == work_item);
		inst = wt->inst;

		switch (wt->kind)
		{
		
		case EVG_ISA_WRITE_TASK_WRITE_DEST:
		{
			if (wt->write_mask)
				evg_isa_write_gpr(work_item, wt->gpr, wt->rel, wt->chan, wt->value);
			work_item->pv.elem[wt->inst->alu] = wt->value;

			/* Debug */
			if (evg_isa_debugging())
			{
				evg_isa_debug("  i%d:%s", work_item->id,
					map_value(&evg_pv_map, wt->inst->alu));
				if (wt->write_mask)
				{
					evg_isa_debug(",");
					evg_inst_dump_gpr(wt->gpr, wt->rel, wt->chan, 0,
						debug_file(evg_isa_debug_category));
				}
				evg_isa_debug("<=");
				gpu_isa_dest_value_dump(inst, &wt->value,
					debug_file(evg_isa_debug_category));
			}

			break;
		}

		case EVG_ISA_WRITE_TASK_WRITE_LDS:
		{
			struct mem_t *local_mem;
			union evg_reg_t lds_value;

			local_mem = work_group->local_mem;
			assert(local_mem);
			assert(wt->lds_value_size);
			mem_write(local_mem, wt->lds_addr, wt->lds_value_size, &wt->lds_value);

			/* Debug */
			lds_value.as_uint = wt->lds_value;
			evg_isa_debug("  i%d:LDS[0x%x]<=(%u,%gf) (%d bytes)", work_item->id, wt->lds_addr,
				lds_value.as_uint, lds_value.as_float, (int) wt->lds_value_size);
			break;
		}

		default:
			linked_list_next(task_list);
			continue;
		}

		/* Done with this task */
		repos_free_object(evg_isa_write_task_repos, wt);
		linked_list_remove(task_list);
	}

	/* Process PUSH_BEFORE, PRED_SET */
	for (linked_list_head(task_list); !linked_list_is_end(task_list); )
	{
		/* Get task */
		wt = linked_list_get(task_list);
		inst = wt->inst;

		/* Process */
		switch (wt->kind)
		{

		case EVG_ISA_WRITE_TASK_PUSH_BEFORE:
		{
			if (!wavefront->push_before_done)
				evg_wavefront_stack_push(wavefront);
			wavefront->push_before_done = 1;
			break;
		}

		case EVG_ISA_WRITE_TASK_SET_PRED:
		{
			int update_pred = EVG_ALU_WORD1_OP2.update_pred;
			int update_exec_mask = EVG_ALU_WORD1_OP2.update_exec_mask;

			assert(inst->info->fmt[1] == EVG_FMT_ALU_WORD1_OP2);
			if (update_pred)
				evg_work_item_set_pred(work_item, wt->cond);
			if (update_exec_mask)
				evg_work_item_set_active(work_item, wt->cond);

			/* Debug */
			if (debug_status(evg_isa_debug_category))
			{
				if (update_pred && update_exec_mask)
					evg_isa_debug("  i%d:act/pred<=%d", work_item->id, wt->cond);
				else if (update_pred)
					evg_isa_debug("  i%d:pred=%d", work_item->id, wt->cond);
				else if (update_exec_mask)
					evg_isa_debug("  i%d:pred=%d", work_item->id, wt->cond);
			}
			break;
		}

		default:
			abort();
		}
		
		/* Done with task */
		repos_free_object(evg_isa_write_task_repos, wt);
		linked_list_remove(task_list);
	}

	/* List should be empty */
	assert(!linked_list_count(task_list));
}
Exemple #8
0
static void evg_tex_engine_fetch(EvgComputeUnit *compute_unit)
{
	EvgGpu *gpu = compute_unit->gpu;

	struct linked_list_t *pending_queue = compute_unit->tex_engine.pending_queue;
	struct linked_list_t *finished_queue = compute_unit->tex_engine.finished_queue;

	EvgWavefront *wavefront;
	struct evg_uop_t *cf_uop, *uop;
	struct evg_work_item_uop_t *work_item_uop;
	EvgInst *inst;
	int inst_num;

	EvgWorkItem *work_item;
	int work_item_id;

	char str[MAX_LONG_STRING_SIZE];
	char str_trimmed[MAX_LONG_STRING_SIZE];

	/* Get wavefront to fetch from */
	linked_list_head(pending_queue);
	cf_uop = linked_list_get(pending_queue);
	if (!cf_uop)
		return;
	wavefront = cf_uop->wavefront;
	assert(wavefront->clause_kind == EvgInstClauseTEX);


	/* If fetch queue is full, cannot fetch until space is made */
	if (compute_unit->tex_engine.fetch_queue_length >= evg_gpu_tex_engine_fetch_queue_size)
		return;
	
	/* Emulate instruction and create uop */
	inst_num = (wavefront->clause_buf - wavefront->clause_buf_start) / 16;
	EvgWavefrontExecute(wavefront);
	inst = wavefront->tex_inst;
	uop = evg_uop_create(compute_unit);
	uop->wavefront = wavefront;
	uop->work_group = wavefront->work_group;
	uop->cf_uop = cf_uop;
	uop->id_in_compute_unit = compute_unit->gpu_uop_id_counter++;
	uop->last = wavefront->clause_kind != EvgInstClauseTEX;
	uop->global_mem_read = wavefront->global_mem_read;
	uop->global_mem_write = wavefront->global_mem_write;
	uop->vliw_slots = 1;

	/* If TEX clause finished, extract CF uop from 'pending_queue' and
	 * insert it into 'finished_queue'. */
	if (uop->last)
	{
		linked_list_remove(pending_queue);
		linked_list_add(finished_queue, cf_uop);
	}

	/* If instruction is a global memory read (should be), record addresses */
	if (uop->global_mem_read)
	{
		assert((inst->info->flags & EvgInstFlagMemRead));
		EVG_FOREACH_WORK_ITEM_IN_WAVEFRONT(wavefront, work_item_id)
		{
			work_item = gpu->ndrange->work_items[work_item_id];
			work_item_uop = &uop->work_item_uop[work_item->id_in_wavefront];
			work_item_uop->global_mem_access_addr = work_item->global_mem_access_addr;
			work_item_uop->global_mem_access_size = work_item->global_mem_access_size;
		}
Exemple #9
0
void evg_faults_init(void)
{
	FILE *f;
	char line[MAX_STRING_SIZE];
	char *line_ptr;
	struct evg_fault_t *fault;
	int line_num;
	long long last_cycle;

	evg_fault_list = linked_list_create();
	if (!*evg_faults_file_name)
		return;

	f = fopen(evg_faults_file_name, "rt");
	if (!f)
		fatal("%s: cannot open file", evg_faults_file_name);
	
	line_num = 0;
	last_cycle = 0;
	while (!feof(f))
	{
		const char *delim = " ";
	
		/* Read a line */
		line_num++;
		line_ptr = fgets(line, MAX_STRING_SIZE, f);
		if (!line_ptr)
			break;

		/* Allocate new fault */
		fault = calloc(1, sizeof(struct evg_fault_t));
		if (!fault)
			fatal("%s: out of memory", __FUNCTION__);

		/* Read <cycle> field */
		line_ptr = strtok(line_ptr, delim);
		if (!line_ptr)
			goto wrong_format;
		fault->cycle = atoll(line_ptr);
		if (fault->cycle < 1)
			fatal("%s: line %d: lowest possible cycle is 1",
				evg_faults_file_name, line_num);
		if (fault->cycle < last_cycle)
			fatal("%s: line %d: cycles must be ordered",
				evg_faults_file_name, line_num);

		/* <fault> - Type of fault */
		line_ptr = strtok(NULL, delim);
		if (!line_ptr)
			goto wrong_format;
		if (!strcmp(line_ptr, "ams"))
			fault->type = evg_fault_ams;
		else if (!strcmp(line_ptr, "reg"))
			fault->type = evg_fault_reg;
		else if (!strcmp(line_ptr, "mem"))
			fault->type = evg_fault_mem;
		else
			fatal("%s: line %d: invalid value for <fault> ('%s')",
				evg_faults_file_name, line_num, line_ptr);

		/* <cu_id> - Compute unit */
		line_ptr = strtok(NULL, delim);
		if (!line_ptr)
			goto wrong_format;
		fault->compute_unit_id = atoi(line_ptr);
		if (fault->compute_unit_id >= evg_gpu_num_compute_units || fault->compute_unit_id < 0)
			fatal("%s: line %d: invalid compute unit ID",
				evg_faults_file_name, line_num);

		/* Analyze rest of the line depending on fault type */
		switch (fault->type)
		{

		case evg_fault_ams:

			/* <stack_id> - Stack ID */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->stack_id = atoi(line_ptr);
			if (fault->stack_id >= evg_gpu_max_wavefronts_per_compute_unit)
				fatal("%s: line %d: invalid stack ID",
					evg_faults_file_name, line_num);

			/* <am_id> - Active mask ID */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->active_mask_id = atoi(line_ptr);
			if (fault->active_mask_id >= EVG_MAX_STACK_SIZE)
				fatal("%s: line %d: invalid active mask ID",
					evg_faults_file_name, line_num);

			/* <bit> */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->bit = atoi(line_ptr);
			if (fault->bit >= evg_emu_wavefront_size)
				fatal("%s: line %d: invalid bit index",
					evg_faults_file_name, line_num);

			/* No more tokens */
			if (strtok(NULL, delim))
				fatal("%s: line %d: too many arguments",
					evg_faults_file_name, line_num);

			break;

		case evg_fault_reg:

			/* <reg_id> - Register ID */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->reg_id = atoi(line_ptr);
			if (fault->reg_id >= evg_gpu_num_registers || fault->reg_id < 0)
				fatal("%s: line %d: invalid compute unit ID",
					evg_faults_file_name, line_num);

			/* <bit> */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->bit = atoi(line_ptr);
			if (fault->bit < 0 || fault->bit >= 128)
				fatal("%s: line %d: invalid bit index",
					evg_faults_file_name, line_num);

			break;

		case evg_fault_mem:

			/* <byte> - Byte position in local memory */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->byte = atoi(line_ptr);
			if (fault->byte >= evg_gpu_local_mem_size || fault->byte < 0)
				fatal("%s: line %d: invalid byte position",
					evg_faults_file_name, line_num);

			/* <bit> - Bit position */
			line_ptr = strtok(NULL, delim);
			if (!line_ptr)
				goto wrong_format;
			fault->bit = atoi(line_ptr);
			if (fault->bit > 7 || fault->bit < 0)
				fatal("%s: line %d: invalid bit position",
					evg_faults_file_name, line_num);

			break;
		}

		/* Insert fault in fault list */
		linked_list_out(evg_fault_list);
		linked_list_insert(evg_fault_list, fault);
		last_cycle = fault->cycle;
		continue;

wrong_format:
		fatal("%s: line %d: not enough arguments",
			evg_faults_file_name, line_num);
	}
	linked_list_head(evg_fault_list);
}
Exemple #10
0
void evg_faults_insert(void)
{
	struct evg_fault_t *fault;
	struct evg_compute_unit_t *compute_unit;

	for (;;)
	{
		linked_list_head(evg_fault_list);
		fault = linked_list_get(evg_fault_list);
		if (!fault || fault->cycle > evg_gpu->cycle)
			break;

		/* Insert fault depending on fault type */
		switch (fault->type)
		{

		case evg_fault_ams:
		{
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;
			struct evg_work_item_t *work_item;

			int work_group_id;  /* in compute unit */
			int wavefront_id;  /* in compute unit */
			int value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id, fault->stack_id,
				fault->active_mask_id, fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */
			work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group;
			wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group;
			if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit
				|| !compute_unit->work_groups[work_group_id])
			{
				evg_faults_debug("effect=\"wf_idle\"");
				goto end_loop;
			}
			work_group = compute_unit->work_groups[work_group_id];
			wavefront = work_group->wavefronts[wavefront_id];

			/* If active_mask_id exceeds stack top, dismiss */
			if (fault->active_mask_id > wavefront->stack_top)
			{
				evg_faults_debug("effect=\"am_idle\"");
				goto end_loop;
			}

			/* If 'bit' exceeds number of work-items in wavefront, dismiss */
			if (fault->bit >= wavefront->work_item_count)
			{
				evg_faults_debug("effect=\"wi_idle\"");
				goto end_loop;
			}

			/* Fault caused an error, show affected software entities */
			work_item = wavefront->work_items[fault->bit];
			evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d",
				work_group->id,
				wavefront->id,
				work_item->id);

			/* Inject fault */
			value = bit_map_get(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1);
			bit_map_set(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1, !value);
			evg_fault_errors++;

			break;
		}

		case evg_fault_reg:
		{
			struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel;

			int work_group_id_in_compute_unit;
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;

			int num_registers_per_work_group;

			int work_item_id_in_compute_unit;
			int work_item_id_in_work_group;
			struct evg_work_item_t *work_item;

			struct linked_list_t *fetch_queue;
			struct evg_uop_t *inst_buffer;
			struct evg_uop_t *exec_buffer;
			struct heap_t *event_queue;
			struct evg_uop_t *uop;

			int lo_reg;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->reg_id,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used
				* kernel->local_size;
			work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get affected entities */
			work_item_id_in_compute_unit = fault->reg_id
				/ kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;
			work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size;
			work_item = work_group->work_items[work_item_id_in_work_group];
			wavefront = work_item->wavefront;
			lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;

			/* Fault falling between Fetch and Read stage of an instruction
			 * consuming register. This case cannot be modeled due to functional
			 * simulation skew. */
			fetch_queue = compute_unit->alu_engine.fetch_queue;
			inst_buffer = compute_unit->alu_engine.inst_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_idep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_read\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_read\"");
				goto end_loop;
			}

			/* Fault falling between Fetch and Write stage of an instruction
			 * writing on the register. The instruction will overwrite the fault,
			 * so this shouldn't cause its injection. */
			exec_buffer = compute_unit->alu_engine.exec_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			uop = exec_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			event_queue = compute_unit->alu_engine.event_queue;
			for (heap_first(event_queue, (void **) &uop); uop;
				heap_next(event_queue, (void **) &uop))
			{
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}

			/* Fault caused error */
			evg_faults_debug("effect=\"error\" ");
			evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ",
				work_group->id, work_item->wavefront->id, work_item->id, lo_reg);

			/* Insert the fault */
			if (fault->bit < 32)
				work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit;
			else if (fault->bit < 64)
				work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32);
			else if (fault->bit < 96)
				work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64);
			else
				work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96);
			evg_fault_errors++;

			break;

		}

		case evg_fault_mem:
		{
			struct evg_work_group_t *work_group;

			int work_group_id_in_compute_unit;
			unsigned char value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->byte,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Check if there is any local memory used at all */
			if (!evg_gpu->ndrange->local_mem_top)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Inject fault */
			evg_faults_debug("effect=\"error\" wg=%d ",
				work_group->id);
			mem_read(work_group->local_mem, fault->byte, 1, &value);
			value ^= 1 << fault->bit;
			mem_write(work_group->local_mem, fault->byte, 1, &value);
			evg_fault_errors++;

			break;

		}

		default:
			panic("invalid fault type");

		}

end_loop:
		/* Extract and free */
		free(fault);
		linked_list_remove(evg_fault_list);
		evg_faults_debug("\n");

		/* If all faults were inserted and no error was caused, end simulation */
		if (!linked_list_count(evg_fault_list) && !evg_fault_errors)
			esim_finish = esim_finish_evg_no_faults;
	}
}