Beispiel #1
0
static void prefetcher_do_prefetch(struct mod_t *mod, struct mod_stack_t *stack,
                                   unsigned int prefetch_addr) {
    int set1, tag1, set2, tag2;

    assert(prefetch_addr > 0);

    /* Predicted prefetch_addr can go horribly wrong
     * sometimes. Since prefetches aren't supposed to
     * cause any kind of faults/exceptions, return. */
    if (!mod_serves_address(mod, prefetch_addr)) {
        mem_debug("  miss_addr 0x%x, prefetch_addr 0x%x, %s : illegal prefetch\n",
                  stack->addr, prefetch_addr, mod->name);
        return;
    }

    cache_decode_address(mod->cache, stack->addr, &set1, &tag1, NULL);
    cache_decode_address(mod->cache, prefetch_addr, &set2, &tag2, NULL);

    /* If the prefetch_addr is in the same block as the missed address
     * there is no point in prefetching. One scenario where this may
     * happen is when we see a stride smaller than block size because
     * of an eviction between the two accesses. */
    if (set1 == set2 && tag1 == tag2) return;

    /* I'm not passing back the mod_client_info structure. If this needs to be
     * passed in the future, make sure a copy is made (since the one that is
     * pointed to by stack->client_info may be freed early. */
    mem_debug("  miss_addr 0x%x, prefetch_addr 0x%x, %s : prefetcher\n",
              stack->addr, prefetch_addr, mod->name);

    mod_access(mod, mod_access_prefetch, prefetch_addr, NULL, NULL, NULL, NULL);
}
Beispiel #2
0
static int X86ThreadIssueSQ(X86Thread *self, int quantum) {
  X86Cpu *cpu = self->cpu;
  X86Core *core = self->core;

  struct x86_uop_t *store;
  struct linked_list_t *sq = self->sq;
  struct mod_client_info_t *client_info;

  /* Process SQ */
  linked_list_head(sq);
  while (!linked_list_is_end(sq) && quantum) {
    /* Get store */
    store = linked_list_get(sq);
    assert(store->uinst->opcode == x86_uinst_store);

    /* Only committed stores issue */
    if (store->in_rob) break;

    /* Check that memory system entry is ready */
    if (!mod_can_access(self->data_mod, store->phy_addr)) break;

    /* Remove store from store queue */
    X86ThreadRemoveFromSQ(self);

    /* create and fill the mod_client_info_t object */
    client_info = mod_client_info_create(self->data_mod);
    client_info->prefetcher_eip = store->eip;

    /* Issue store */
    mod_access(self->data_mod, mod_access_store, store->phy_addr, NULL,
               core->event_queue, store, client_info);

    /* The cache system will place the store at the head of the
     * event queue when it is ready. For now, mark "in_event_queue" to
     * prevent the uop from being freed. */
    store->in_event_queue = 1;
    store->issued = 1;
    store->issue_when = asTiming(cpu)->cycle;

    /* Statistics */
    core->num_issued_uinst_array[store->uinst->opcode]++;
    core->lsq_reads++;
    core->reg_file_int_reads += store->ph_int_idep_count;
    core->reg_file_fp_reads += store->ph_fp_idep_count;
    self->num_issued_uinst_array[store->uinst->opcode]++;
    self->lsq_reads++;
    self->reg_file_int_reads += store->ph_int_idep_count;
    self->reg_file_fp_reads += store->ph_fp_idep_count;
    cpu->num_issued_uinst_array[store->uinst->opcode]++;
    if (store->trace_cache) self->trace_cache->num_issued_uinst++;

    /* One more instruction, update quantum. */
    quantum--;

    /* MMU statistics */
    if (*mmu_report_file_name)
      mmu_access_page(store->phy_addr, mmu_access_write);
  }
  return quantum;
}
Beispiel #3
0
static int issue_sq(int core, int thread, int quant)
{
	struct uop_t *store;
	struct linked_list_t *sq = THREAD.sq;

	/* Process SQ */
	linked_list_head(sq);
	while (!linked_list_is_end(sq) && quant)
	{
		/* Get store */
		store = linked_list_get(sq);
		assert(store->uinst->opcode == x86_uinst_store);

		/* Only committed stores issue */
		if (store->in_rob)
			break;

		/* Check that memory system entry is ready */
		if (!mod_can_access(THREAD.data_mod, store->phy_addr))
			break;

		/* Remove store from store queue */
		sq_remove(core, thread);

		/* Issue store */
		mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_write,
			store->phy_addr, NULL, CORE.eventq, store);

		/* The cache system will place the store at the head of the
		 * event queue when it is ready. For now, mark "in_eventq" to
		 * prevent the uop from being freed. */
		store->in_eventq = 1;
		store->issued = 1;
		store->issue_when = cpu->cycle;
	
		/* Instruction issued */
		CORE.issued[store->uinst->opcode]++;
		CORE.lsq_reads++;
		CORE.rf_int_reads += store->ph_int_idep_count;
		CORE.rf_fp_reads += store->ph_fp_idep_count;
		THREAD.issued[store->uinst->opcode]++;
		THREAD.lsq_reads++;
		THREAD.rf_int_reads += store->ph_int_idep_count;
		THREAD.rf_fp_reads += store->ph_fp_idep_count;
		cpu->issued[store->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);

		/* Debug */
		esim_debug("uop action=\"update\", core=%d, seq=%llu,"
			" stg_issue=1, in_lsq=0, issued=1\n",
			store->core, (long long unsigned) store->di_seq);
	}
	return quant;
}
Beispiel #4
0
static int x86_cpu_issue_sq(int core, int thread, int quant)
{
	struct x86_uop_t *store;
	struct linked_list_t *sq = X86_THREAD.sq;

	/* Process SQ */
	linked_list_head(sq);
	while (!linked_list_is_end(sq) && quant)
	{
		/* Get store */
		store = linked_list_get(sq);
		assert(store->uinst->opcode == x86_uinst_store);

		/* Only committed stores issue */
		if (store->in_rob)
			break;

		/* Check that memory system entry is ready */
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
			break;

		/* Remove store from store queue */
		x86_sq_remove(core, thread);

		/* Issue store */
		mod_access(X86_THREAD.data_mod, mod_access_store,
			store->phy_addr, NULL, X86_CORE.event_queue, store);

		/* The cache system will place the store at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		store->in_event_queue = 1;
		store->issued = 1;
		store->issue_when = x86_cpu->cycle;
	
		/* Instruction issued */
		X86_CORE.issued[store->uinst->opcode]++;
		X86_CORE.lsq_reads++;
		X86_CORE.reg_file_int_reads += store->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += store->ph_fp_idep_count;
		X86_THREAD.issued[store->uinst->opcode]++;
		X86_THREAD.lsq_reads++;
		X86_THREAD.reg_file_int_reads += store->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += store->ph_fp_idep_count;
		x86_cpu->issued[store->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);
	}
	return quant;
}
Beispiel #5
0
void frm_lds_mem(struct frm_lds_t *lds) {
  struct frm_uop_t *uop;
  struct frm_thread_uop_t *thread_uop;
  struct frm_thread_t *thread;
  int thread_id;
  int instructions_processed = 0;
  int list_entries;
  int i, j;
  enum mod_access_kind_t access_type;
  int list_index = 0;

  list_entries = list_count(lds->read_buffer);

  /* Sanity check the read buffer */
  assert(list_entries <= frm_gpu_lds_read_buffer_size);

  for (i = 0; i < list_entries; i++) {
    uop = list_get(lds->read_buffer, list_index);
    assert(uop);

    instructions_processed++;

    /* Uop is not ready yet */
    if (asTiming(frm_gpu)->cycle < uop->read_ready) {
      list_index++;
      continue;
    }

    /* Stall if the width has been reached. */
    if (instructions_processed > frm_gpu_lds_width) {
      frm_trace(
          "si.inst id=%lld cu=%d wf=%d uop_id=%lld "
          "stg=\"s\"\n",
          uop->id_in_sm, lds->sm->id, uop->warp->id, uop->id_in_warp);
      list_index++;
      continue;
    }

    /* Sanity check uop */
    assert(uop->lds_read || uop->lds_write);

    /* Sanity check mem buffer */
    assert(list_count(lds->mem_buffer) <=
           frm_gpu_lds_max_inflight_mem_accesses);

    /* Stall if there is no room in the memory buffer */
    if (list_count(lds->mem_buffer) == frm_gpu_lds_max_inflight_mem_accesses) {
      frm_trace(
          "si.inst id=%lld cu=%d wf=%d uop_id=%lld "
          "stg=\"s\"\n",
          uop->id_in_sm, lds->sm->id, uop->warp->id, uop->id_in_warp);
      list_index++;
      continue;
    }

    /* Access local memory */
    for (thread_id = uop->warp->threads[0]->id_in_warp;
         thread_id < uop->warp->thread_count; thread_id++) {
      thread = uop->warp->threads[thread_id];
      thread_uop = &uop->thread_uop[thread->id_in_warp];

      for (j = 0; j < thread_uop->lds_access_count; j++) {
        if (thread->lds_access_type[j] == 1) {
          access_type = mod_access_load;
        } else if (thread->lds_access_type[j] == 2) {
          access_type = mod_access_store;
        } else {
          fatal("%s: invalid lds access type", __FUNCTION__);
        }

        mod_access(lds->sm->lds_module, access_type,
                   thread_uop->lds_access_addr[j], &uop->lds_witness, NULL,
                   NULL, NULL);
        uop->lds_witness--;
      }
    }

    /* Increment outstanding memory access count */
    uop->warp_inst_queue_entry->lgkm_cnt++;

    /* Transfer the uop to the mem buffer */
    list_remove(lds->read_buffer, uop);
    list_enqueue(lds->mem_buffer, uop);

    frm_trace(
        "si.inst id=%lld cu=%d wf=%d uop_id=%lld "
        "stg=\"lds-m\"\n",
        uop->id_in_sm, lds->sm->id, uop->warp->id, uop->id_in_warp);
  }
}
Beispiel #6
0
static int issue_lq(int core, int thread, int quant)
{
	struct linked_list_t *lq = THREAD.lq;
	struct uop_t *load;

	/* Debug */
	if (esim_debug_file)
		uop_lnlist_check_if_ready(lq);
	
	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !rf_ready(load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(THREAD.data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		lq_remove(core, thread);

		/* Access memory system */
		mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_read,
			load->phy_addr, NULL, CORE.eventq, load);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_eventq" to
		 * prevent the uop from being freed. */
		load->in_eventq = 1;
		load->issued = 1;
		load->issue_when = cpu->cycle;
		
		/* Instruction issued */
		CORE.issued[load->uinst->opcode]++;
		CORE.lsq_reads++;
		CORE.rf_int_reads += load->ph_int_idep_count;
		CORE.rf_fp_reads += load->ph_fp_idep_count;
		THREAD.issued[load->uinst->opcode]++;
		THREAD.lsq_reads++;
		THREAD.rf_int_reads += load->ph_int_idep_count;
		THREAD.rf_fp_reads += load->ph_fp_idep_count;
		cpu->issued[load->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		/* Debug */
		esim_debug("uop action=\"update\", core=%d, seq=%llu,"
			" stg_issue=1, in_lsq=0, issued=1\n",
			load->core, (long long unsigned) load->di_seq);
	}
	
	return quant;
}
Beispiel #7
0
/* Event handler for EV_MEM_SYSTEM_COMMAND.
 * The event data is a string of type 'char *' that needs to be deallocated
 * after processing this event. */
void mem_system_command_handler(int event, void *data)
{
	struct list_t *token_list;

	char *command_line = data;
	char command[MAX_STRING_SIZE];

	/* Get command */
	str_token(command, sizeof command, command_line, 0, " ");
	if (!command[0])
		fatal("%s: invalid command syntax.\n\t> %s",
			__FUNCTION__, command_line);

	/* Commands that need to be processed at the end of the simulation
	 * are ignored here. These are command prefixed with 'CheckXXX'. */
	if (!strncasecmp(command, "Check", 5))
	{
		esim_schedule_end_event(EV_MEM_SYSTEM_END_COMMAND, data);
		return;
	}

	/* Split command in tokens, skip command */
	token_list = str_token_list_create(command_line, " ");
	assert(list_count(token_list));
	str_token_list_shift(token_list);

	/* Command 'SetBlock' */
	if (!strcasecmp(command, "SetBlock"))
	{
		struct mod_t *mod;

		int set;
		int way;
		int tag;

		int set_check;
		int tag_check;

		int state;

		mod = mem_system_command_get_mod(token_list, command_line);
		mem_system_command_get_set_way(token_list, command_line, mod, &set, &way);
		tag = mem_system_command_get_hex(token_list, command_line);
		state = mem_system_command_get_state(token_list, command_line);
		mem_system_command_end(token_list, command_line);

		/* Check that module serves address */
		if (!mod_serves_address(mod, tag))
			fatal("%s: %s: module does not serve address 0x%x.\n\t> %s",
				__FUNCTION__, mod->name, tag, command_line);

		/* Check that tag goes to specified set */
		mod_find_block(mod, tag, &set_check, NULL, &tag_check, NULL);
		if (set != set_check)
			fatal("%s: %s: tag 0x%x belongs to set %d.\n\t> %s",
				__FUNCTION__, mod->name, tag, set_check, command_line);
		if (tag != tag_check)
			fatal("%s: %s: tag should be multiple of block size.\n\t> %s",
				__FUNCTION__, mod->name, command_line);

		/* Set tag */
		cache_set_block(mod->cache, set, way, tag, state);
	}

	/* Command 'SetOwner' */
	else if (!strcasecmp(command, "SetOwner"))
	{
		struct mod_t *mod;
		struct mod_t *owner;

		int set;
		int way;

		int sub_block;
		int owner_index;

		/* Get fields */
		mod = mem_system_command_get_mod(token_list, command_line);
		mem_system_command_get_set_way(token_list, command_line, mod, &set, &way);
		sub_block = mem_system_command_get_sub_block(token_list, command_line, mod, set, way);
		owner = mem_system_command_get_mod(token_list, command_line);
		mem_system_command_end(token_list, command_line);

		/* Check that owner is an immediate higher-level module */
		if (owner)
		{
			if (owner->low_net != mod->high_net || !owner->low_net)
				fatal("%s: %s is not a higher-level module of %s.\n\t> %s",
					__FUNCTION__, owner->name, mod->name, command_line);
		}

		/* Set owner */
		owner_index = owner ? owner->low_net_node->index : -1;
		dir_entry_set_owner(mod->dir, set, way, sub_block, owner_index);
	}

	/* Command 'SetSharers' */
	else if (!strcasecmp(command, "SetSharers"))
	{
		struct mod_t *mod;
		struct mod_t *sharer;

		int set;
		int way;

		int sub_block;

		/* Get first fields */
		mod = mem_system_command_get_mod(token_list, command_line);
		mem_system_command_get_set_way(token_list, command_line, mod, &set, &way);
		sub_block = mem_system_command_get_sub_block(token_list, command_line, mod, set, way);

		/* Get sharers */
		mem_system_command_expect(token_list, command_line);
		dir_entry_clear_all_sharers(mod->dir, set, way, sub_block);
		while (list_count(token_list))
		{
			/* Get sharer */
			sharer = mem_system_command_get_mod(token_list, command_line);
			if (!sharer)
				continue;

			/* Check that sharer is an immediate higher-level module */
			if (sharer->low_net != mod->high_net || !sharer->low_net)
				fatal("%s: %s is not a higher-level module of %s.\n\t> %s",
					__FUNCTION__, sharer->name, mod->name, command_line);

			/* Set sharer */
			dir_entry_set_sharer(mod->dir, set, way, sub_block, sharer->low_net_node->index);
		}
	}

	/* Command 'Access' */
	else if (!strcasecmp(command, "Access"))
	{
		struct mod_t *mod;
		enum mod_access_kind_t access_kind;
		unsigned int addr;
		long long cycle;

		/* Read fields */
		mod = mem_system_command_get_mod(token_list, command_line);
		cycle = mem_system_command_get_cycle(token_list, command_line);
		access_kind = mem_system_command_get_mod_access(token_list, command_line);
		addr = mem_system_command_get_hex(token_list, command_line);

		/* If command is scheduled for later, exit */
		if (cycle > esim_cycle)
		{
			str_token_list_free(token_list);
			esim_schedule_event(EV_MEM_SYSTEM_COMMAND, data, cycle - esim_cycle);
			return;
		}

		/* Access module */
		mod_access(mod, access_kind, addr, NULL, NULL, NULL, NULL);
	}

	/* Command not supported */
	else
		fatal("%s: %s: invalid command.\n\t> %s",
			__FUNCTION__, command, command_line);

	/* Free command */
	free(command_line);
	str_token_list_free(token_list);
}
Beispiel #8
0
static int X86ThreadIssuePreQ(X86Thread *self, int quantum)
{
	X86Core *core = self->core;
	X86Cpu *cpu = self->cpu;

	struct linked_list_t *preq = self->preq;
	struct x86_uop_t *prefetch;

	/* Process preq */
	linked_list_head(preq);
	while (!linked_list_is_end(preq) && quantum)
	{
		/* Get element from prefetch queue. If it is not ready, go to the next one */
		prefetch = linked_list_get(preq);
		if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch))
		{
			linked_list_next(preq);
			continue;
		}

		/* 
		 * Make sure its not been prefetched recently. This is just to avoid unnecessary
		 * memory traffic. Even though the cache will realise a "hit" on redundant 
		 * prefetches, its still helpful to avoid going to the memory (cache). 
		 */
		if (prefetch_history_is_redundant(core->prefetch_history,
							   self->data_mod, prefetch->phy_addr))
		{
			/* remove from queue. do not prefetch. */
			assert(prefetch->uinst->opcode == x86_uinst_prefetch);
			X86ThreadRemovePreQ(self);
			prefetch->completed = 1;
			x86_uop_free_if_not_queued(prefetch);
			continue;
		}

		prefetch->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(self->data_mod, prefetch->phy_addr))
		{
			linked_list_next(preq);
			continue;
		}

		/* Remove from prefetch queue */
		assert(prefetch->uinst->opcode == x86_uinst_prefetch);
		X86ThreadRemovePreQ(self);

		/* Access memory system */
		mod_access(self->data_mod, mod_access_prefetch,
			prefetch->phy_addr, NULL, core->event_queue, prefetch, NULL);

		/* Record prefetched address */
		prefetch_history_record(core->prefetch_history, prefetch->phy_addr);

		/* The cache system will place the prefetch at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		prefetch->in_event_queue = 1;
		prefetch->issued = 1;
		prefetch->issue_when = asTiming(cpu)->cycle;
		
		/* Statistics */
		core->num_issued_uinst_array[prefetch->uinst->opcode]++;
		core->lsq_reads++;
		core->reg_file_int_reads += prefetch->ph_int_idep_count;
		core->reg_file_fp_reads += prefetch->ph_fp_idep_count;
		self->num_issued_uinst_array[prefetch->uinst->opcode]++;
		self->lsq_reads++;
		self->reg_file_int_reads += prefetch->ph_int_idep_count;
		self->reg_file_fp_reads += prefetch->ph_fp_idep_count;
		cpu->num_issued_uinst_array[prefetch->uinst->opcode]++;
		if (prefetch->trace_cache)
			self->trace_cache->num_issued_uinst++;

		/* One more instruction issued, update quantum. */
		quantum--;
		
		/* MMU statistics */
		MMUAccessPage(cpu->mmu, prefetch->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			prefetch->id_in_core, core->id);
	}
	
	return quantum;
}
Beispiel #9
0
static int X86ThreadIssueLQ(X86Thread *self, int quant)
{
	X86Core *core = self->core;
	X86Cpu *cpu = self->cpu;

	struct linked_list_t *lq = self->lq;
	struct x86_uop_t *load;
	struct mod_client_info_t *client_info;

	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !X86ThreadIsUopReady(self, load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(self->data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		X86ThreadRemoveFromLQ(self);

		/* create and fill the mod_client_info_t object */
		client_info = mod_client_info_create(self->data_mod);
		client_info->prefetcher_eip = load->eip;

		/* Access memory system */
		mod_access(self->data_mod, mod_access_load,
			load->phy_addr, NULL, core->event_queue, load, client_info);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		load->in_event_queue = 1;
		load->issued = 1;
		load->issue_when = asTiming(cpu)->cycle;
		
		/* Statistics */
		core->num_issued_uinst_array[load->uinst->opcode]++;
		core->lsq_reads++;
		core->reg_file_int_reads += load->ph_int_idep_count;
		core->reg_file_fp_reads += load->ph_fp_idep_count;
		self->num_issued_uinst_array[load->uinst->opcode]++;
		self->lsq_reads++;
		self->reg_file_int_reads += load->ph_int_idep_count;
		self->reg_file_fp_reads += load->ph_fp_idep_count;
		cpu->num_issued_uinst_array[load->uinst->opcode]++;
		if (load->trace_cache)
			self->trace_cache->num_issued_uinst++;

		/* One more instruction issued, update quantum. */
		quant--;
		
		/* MMU statistics */
		MMUAccessPage(cpu->mmu, load->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, core->id);
	}
	
	return quant;
}
void frm_vector_mem_mem(struct frm_vector_mem_unit_t *vector_mem)
{
	struct frm_uop_t *uop;
	struct frm_thread_uop_t *thread_uop;
	struct frm_thread_t *thread;
	int thread_id;
	int instructions_processed = 0;
	int list_entries;
	int i;
	enum mod_access_kind_t access_kind;
	int list_index = 0;

	list_entries = list_count(vector_mem->read_buffer);
	
	/* Sanity check the read buffer */
	assert(list_entries <= frm_gpu_vector_mem_read_buffer_size);

	for (i = 0; i < list_entries; i++)
	{
		uop = list_get(vector_mem->read_buffer, list_index);
		assert(uop);

		instructions_processed++;

		/* Uop is not ready yet */
		if (asTiming(frm_gpu)->cycle < uop->read_ready)
		{
			list_index++;
			continue;
		}

		/* Stall if the width has been reached. */
		if (instructions_processed > frm_gpu_vector_mem_width)
		{
			frm_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_sm, 
				vector_mem->sm->id, 
				uop->warp->id, uop->id_in_warp);
			list_index++;
			continue;
		}

		/* Sanity check mem buffer */
		assert(list_count(vector_mem->mem_buffer) <= 
			frm_gpu_vector_mem_max_inflight_mem_accesses);

		/* Stall if there is not room in the memory buffer */
		if (list_count(vector_mem->mem_buffer) == 
			frm_gpu_vector_mem_max_inflight_mem_accesses)
		{
			frm_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
				"stg=\"s\"\n", uop->id_in_sm, 
				vector_mem->sm->id, 
				uop->warp->id, uop->id_in_warp);
			list_index++;
			continue;
		}

		/* Set the access type */
		if (uop->vector_mem_write && !uop->glc)
			access_kind = mod_access_nc_store;
		else if (uop->vector_mem_write && uop->glc)
			access_kind = mod_access_store;
		else if (uop->vector_mem_read)
			access_kind = mod_access_load;
		else 
			fatal("%s: invalid access kind", __FUNCTION__);

		/* Access global memory */
		assert(!uop->global_mem_witness);
		for (thread_id = uop->warp->threads[0]->id_in_warp; 
				thread_id < uop->warp->thread_count; 
				thread_id++)
		{
			thread = uop->warp->threads[thread_id];
			thread_uop = 
				&uop->thread_uop[thread->id_in_warp];

			mod_access(vector_mem->sm->global_memory, 
				access_kind, 
				thread_uop->global_mem_access_addr,
				&uop->global_mem_witness, NULL, NULL, NULL);
			uop->global_mem_witness--;
		}

		if(frm_spatial_report_active)
		{
			if (uop->vector_mem_write)
			{
				uop->num_global_mem_write += 
					uop->global_mem_witness;
				frm_report_global_mem_inflight(uop->sm,
						uop->num_global_mem_write);
			}
			else if (uop->vector_mem_read)
			{
				uop->num_global_mem_read += 
					uop->global_mem_witness;
				frm_report_global_mem_inflight(uop->sm,
						uop->num_global_mem_read);
			}
			else
				fatal("%s: invalid access kind", __FUNCTION__);
		}

		/* Increment outstanding memory access count */
		uop->warp_inst_queue_entry->lgkm_cnt++;

		/* Transfer the uop to the mem buffer */
		list_remove(vector_mem->read_buffer, uop);
		list_enqueue(vector_mem->mem_buffer, uop);

		frm_trace("si.inst id=%lld cu=%d wf=%d uop_id=%lld "
			"stg=\"mem-m\"\n", uop->id_in_sm, 
			vector_mem->sm->id, uop->warp->id, 
			uop->id_in_warp);
	}
}
Beispiel #11
0
/*
 * ACC call #2 - accArith
 *
 * accArith - Arithmatic calculation for Whetstone Benchmark
 *
 * @return
 *	The function always returns 0 if running properly;
 *	ruturns -1 if illegal input value is detected
 */
static int x86_acc_func_accArith (struct x86_ctx_t *ctx)
{
	int core = 0; 
	int thread = 0;

	struct x86_regs_t *regs = ctx->regs;
	struct mem_t *mem = ctx->mem;


	unsigned int args_ptr;
	double func_args[4];
	/* Read arguments */
	args_ptr = regs->ecx;
	printf ("args_ptr = %u(0x%x)\n\n",args_ptr,args_ptr);
	func_args[0] = 1.0;
	func_args[1] = -1.0;
	func_args[2] = -1.0;
	func_args[3] = -1.0;

	/* Get function info */
	//mem_read(mem, args_ptr, sizeof(double), func_args);

/*
	mem_read(mem, args_ptr+4, 8, func_args[1] );
	mem_read(mem, args_ptr+8, 8, func_args[2] );
	mem_read(mem, args_ptr+12, 8, func_args[3] );
	mem_read(mem, args_ptr+16, 8, func_args[4] );
*/
	//func_args[0] = &args_ptr;
	//func_args[1] = &args_ptr+8;
	//func_args[2] = &args_ptr+16;
	//func_args[3] = &args_ptr+24;
	//func_args[4] = &args_ptr+60;
	printf("*******************************\n");
	printf("In Emulation\n");
	//printf("\t\tfunc_args = %u (0x%x)\n", func_args, func_args);
	printf ("Cycle when getting into this call is %lld\n\n", x86_cpu->cycle); 

	/***********************************************/


	struct linked_list_t *sq = X86_THREAD.sq;
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *store;
	struct x86_uop_t *load;
	int quant = x86_cpu_issue_width;
	

	linked_list_head(sq);
	while (!linked_list_is_end(sq)&& quant )
	{
		store = linked_list_get(sq);
		printf ("physical addr @ store: %d\n",store->phy_addr);
		//assert(store->uinst->opcode == x86_uinst_store);
		if (!store->ready && !x86_reg_file_ready(store))
		{
			linked_list_next(sq);
			continue;
		}


		store->ready = 1;
		//printf ("physical add: %d\n",load->phy_addr);
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
		{
			//printf("Debug Point 5\n");
			linked_list_next(sq);
			continue;
		}
		
		int i = 9000;
		while (i--)
		{
			//printf("Debug Point 6\n");
			mod_access(X86_THREAD.data_mod, mod_access_store,
				store->phy_addr, NULL, X86_CORE.event_queue, store);
		}
		quant--;
		
		// MMU statistics
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);

	}

	quant = x86_cpu_issue_width;
	
	linked_list_head(lq);
	while (!linked_list_is_end(lq)&& quant )
	{
		load = linked_list_get(lq);
		printf ("physical add @ load: %d\n",load->phy_addr);
		//assert(store->uinst->opcode == x86_uinst_store);
		load->ready = 1;
		if (!load->ready && !x86_reg_file_ready(load))
		{
			printf("load debug point 1\n");
			linked_list_next(sq);
			continue;
		}


		load->ready = 1;
		//printf ("physical add: %d\n",load->phy_addr);
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
		{
			printf("load debug point 2\n");
			linked_list_next(lq);
			continue;
		}
		
		int j = 9000;
		while (j--)
		{
			//printf("load debug point 3\n");
			mod_access(X86_THREAD.data_mod, mod_access_load,
				load->phy_addr, NULL, X86_CORE.event_queue, load);
		}
		quant--;

		//printf("load debug point 4: quant = %d\n", quant);

		// MMU statistics
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		// Trace
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, load->core);
	}



	/***********************************************/
	
/*
	printf("\t\tfunc_args[0] = %u (0x%x)\n", func_args[0], func_args[0]);
	printf("\t\tfunc_args[1] = %u (0x%x)\n", func_args[1], func_args[1]);
	printf("\t\tfunc_args[2] = %u (0x%x)\n", func_args[2], func_args[2]);
	printf("\t\tfunc_args[3] = %u (0x%x)\n", func_args[3], func_args[3]);
	printf("\t\tfunc_args[4] = %u (0x%x)\n", func_args[4], func_args[4]);

	printf("get here 1\n");
*/
/*
	printf("Value:\n\t\tfunc_args[0] = %f (0x%x)\n", *func_args[0], *func_args[0]);
	printf("\t\tfunc_args[1] = %f (0x%x)\n", *func_args[1], *func_args[1]);
	printf("\t\tfunc_args[2] = %f (0x%x)\n", *func_args[2], *func_args[2]);
	printf("\t\tfunc_args[3] = %f (0x%x)\n", *func_args[3], *func_args[3]);
	printf("\t\tfunc_args[4] = %f (0x%x)\n", *func_args[4], *func_args[4]);
*/
/*
	double *A0 = func_args[0];
	double *A1 = func_args[1];
	double *A2 = func_args[2];
	double *A3 = func_args[3];
	double *A4 = func_args[4];
*/
	//double N = *A0;

	//printf("get here\n");
/*
	printf("\t\tN  = %f (0x%x)\n", *A0,*A0);
	printf("\t\tA1 = %f (0x%x)\n", *A1,*A1);
	printf("\t\tA2 = %f (0x%x)\n", *A2,*A2);
	printf("\t\tA3 = %f (0x%x)\n", *A3,*A3);
	printf("\t\tA4 = %f (0x%x)\n", *A4,*A4);
*/

	double T = 0.499975;

	printf ("func_args1 = %f, func_args2 = %f, func_args3 = %f, func_args4 = %f\n",func_args[0],func_args[1],func_args[2],func_args[3]);

/*
	func_args[0] = (func_args[0] + func_args[1] + func_args[2] - func_args[3])*T;
	func_args[1] = (func_args[0] + func_args[1] - func_args[2] + func_args[3])*T;
	func_args[2] = (func_args[0] - func_args[1] + func_args[2] - func_args[3])*T;
	func_args[3] = (-func_args[0] + func_args[1] + func_args[2] + func_args[3])*T;
*/
	return 0;
}
Beispiel #12
0
static int x86_acc_func_accDTW (struct x86_ctx_t *ctx)
{
	struct x86_regs_t *regs = ctx->regs;
	struct mem_t *mem = ctx->mem;
	int core; 
	int thread;
	
	unsigned int args_ptr;
	//int x;
	struct arglist func_args;
	/* Read arguments */
	args_ptr = regs->ecx;


	/* Get function info */
	mem_read(mem, args_ptr, sizeof(arglist), &func_args);
	printf("\t\t**sample1 = %p (%p)\n", func_args.sample1, &(func_args.sample1[0][0]));
	printf("\t\tlength1      = %u (%p)\n", func_args.length1, &func_args.length1);
	printf("\t\t**sample2 = %p (%p)\n", func_args.sample2, &(func_args.sample2[0][0]));
	printf("\t\tlength2      = %u (%p)\n", func_args.length2, &func_args.length2);
	printf("\t\ti               = %u (%p)\n", func_args.i, &func_args.i);
	printf("\t\tj               = %u (%p)\n", func_args.j, &func_args.j);
	printf("\t\t*table       = %p (%p)\n", func_args.table, &(func_args.table[0]));

	/***********************************************/

#define L2ONLY
#define WITHACC


#ifdef L2ONLY
	printf ("Cache Behavior Simulation\n");
	char * mod_name = "mod-l2-0";
	X86_THREAD.data_mod = mem_system_get_mod (mod_name);
#endif

#ifdef WITHACC
	struct linked_list_t *sq = X86_THREAD.sq;
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *store;
	struct x86_uop_t *load;
	int quant = x86_cpu_issue_width;
	unsigned int count1, count2;

	for (count1 = 0; count1 < 124; count1 ++)
	{
		for(count2 = 0; count2 < 124; count2++)
		{
			linked_list_head(sq);
			while (!linked_list_is_end(sq)&& quant )
			{
				store = linked_list_get(sq);
				printf("\n\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n\n");
				printf("physical addr @ store: %d\n",store->phy_addr);
				assert(store->uinst->opcode == x86_uinst_store);
				if (!store->ready && !x86_reg_file_ready(store))
				{
					linked_list_next(sq);
					continue;
				}


				store->ready = 1;
				printf ("data module kind: %d\n",X86_THREAD.data_mod->kind);
				printf ("data module level: %d\n",X86_THREAD.data_mod->level);
				printf ("data module name: %s\n",X86_THREAD.data_mod->name);
				printf ("data module cache name: %s\n",X86_THREAD.data_mod->cache->name);
				if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
				{
					linked_list_next(sq);
					continue;
				}
		
				int i = 3;
				while (i--)
				{
					printf("Store Debug Point 6\n");
					mod_access(X86_THREAD.data_mod, mod_access_store,
						store->phy_addr, NULL, X86_CORE.event_queue, store);
				}
				quant--;
		
				// MMU statistics
				if (*mmu_report_file_name)
					mmu_access_page(store->phy_addr, mmu_access_write);

			}

			quant = x86_cpu_issue_width;

			//printf("Load Simulation ... \n");	
			linked_list_head(lq);
			while (!linked_list_is_end(lq)&& quant )
			{
				load = linked_list_get(lq);
				printf ("physical add @ load: %d\n",load->phy_addr);
				assert(store->uinst->opcode == x86_uinst_store);
				load->ready = 1;
				if (!load->ready && !x86_reg_file_ready(load))
				{
					printf("load debug point 1\n");
					linked_list_next(sq);
					continue;
				}


				load->ready = 1;
				//printf ("physical add: %d\n",load->phy_addr);
				if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
				{
					printf("load debug point 2\n");
					linked_list_next(lq);
					continue;
				}
		
				int j = 1;
				while (j--)
				{
					printf("load debug point 3\n");
					mod_access(X86_THREAD.data_mod, mod_access_load,
						load->phy_addr, NULL, X86_CORE.event_queue, load);
				}
				quant--;

				//printf("load debug point 4: quant = %d\n", quant);

				// MMU statistics
				if (*mmu_report_file_name)
					mmu_access_page(load->phy_addr, mmu_access_read);

				// Trace
				x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
					load->id_in_core, load->core);
			}
		}
	}
#endif

#ifdef L2ONLY
	mod_name = "mod-dl1-0";
	X86_THREAD.data_mod = mem_system_get_mod (mod_name);
#endif

	/***********************************************/
	int ret = DTWdistance(func_args.sample1, func_args.length1, func_args.sample2, 
		func_args.length2, func_args.i, func_args.j, func_args.table);
	return ret;

}
Beispiel #13
0
static void X86ThreadFetch(X86Thread *self) {
	X86Context *ctx = self->ctx;
	struct x86_uop_t *uop;

	unsigned int phy_addr;
	unsigned int block;
	unsigned int target;

	int taken;

	/* Try to fetch from trace cache first */
	if (x86_trace_cache_present && X86ThreadFetchTraceCache(self))
		return;

	/* If new block to fetch is not the same as the previously fetched (and stored)
	 * block, access the instruction cache. */
	block = self->fetch_neip & ~(self->inst_mod->block_size - 1);
	if (block != self->fetch_block) {

		phy_addr = mmu_translate(self->ctx->address_space_index,
				self->fetch_neip);

		self->fetch_block = block;
		self->fetch_address = phy_addr;
		self->fetch_access = mod_access(self->inst_mod, mod_access_load,
				phy_addr, NULL, NULL, NULL,
				NULL, self->inst_latency, 4);
		self->btb_reads++;

		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(phy_addr, mmu_access_execute);
	}

	/* Fetch all instructions within the block up to the first predict-taken branch. */
	while ((self->fetch_neip & ~(self->inst_mod->block_size - 1)) == block) {
		/* If instruction caused context to suspend or finish */
		if (!X86ContextGetState(ctx, X86ContextRunning))
			break;

		/* If fetch queue full, stop fetching */
		if (self->fetchq_occ >= x86_fetch_queue_size)
			break;

		/* Insert macro-instruction into the fetch queue. Since the macro-instruction
		 * information is only available at this point, we use it to decode
		 * instruction now and insert uops into the fetch queue. However, the
		 * fetch queue occupancy is increased with the macro-instruction size. */
		uop = X86ThreadFetchInst(self, 0);
		if (!ctx->inst.size) /* x86_isa_inst invalid - no forward progress in loop */
			break;
		if (!uop) /* no uop was produced by this macro-instruction */
			continue;

		/* Instruction detected as branches by the BTB are checked for branch
		 * direction in the branch predictor. If they are predicted taken,
		 * stop fetching from this block and set new fetch address. */
		if (uop->flags & X86_UINST_CTRL) {
			target = X86ThreadLookupBTB(self, uop);
			taken = target && X86ThreadLookupBranchPred(self, uop);
			if (taken) {
				self->fetch_neip = target;
				uop->pred_neip = target;
				break;
			}
		}
	}
}
void Memory_Drived_Prefetch(X86Thread *self, X86Context *context)
{
    if (!self)
    {
        return;
    }


    if (X86ContextGetState(context, X86ContextRunning))
    {
        return;
    }

	X86Core *core = self->core;
	int context_id = context->pid;
	/* Get element from load queue. If it is not ready, go to the next one */

    struct x86_stride_pattern_t *StrideSummary = context->MemorySummary.stride_pattern_log;
    struct x86_MRU_pattern_t *MRU_I_Summary = context->MemorySummary.MRU_Instruction_log;
    struct x86_MRU_pattern_t *MRU_D_Summary = context->MemorySummary.MRU_Data_log;

    /*Prefetch Stride Pattern Data*/
    for (int i = 0; i < MAX_PATTERN_COUNT; ++i)
    {
        if (StrideSummary[i].InitialAddress == 0)
        {
            continue;
        }

    	if (StrideSummary[i].stride * StrideSummary[i].instruction_address_count < L1_BLOCK_SIZE)
    	{
    		mod_access(self->data_mod, mod_access_load,
                   StrideSummary[i].InitialAddress, NULL, NULL, NULL, NULL);
    	}
    	else
    	{
    		int blocks = (int) StrideSummary[i].stride * StrideSummary[i].instruction_address_count / L1_BLOCK_SIZE;

    		for (int i = 0; i < blocks; ++i)
    		{
    			mod_access(self->data_mod, mod_access_load,
                   StrideSummary[i].InitialAddress + i * L1_BLOCK_SIZE, NULL, NULL, NULL, NULL);
    		}
    	}
    }

    /*Prefetch MRU Data*/
    for (int i = 0; i < MAX_PATTERN_COUNT; ++i)
    {
		for (int way = 0; way < MRU_ASSOCIATIVITY; ++way)
		{
			if(MRU_D_Summary[way].tag[way] != 0)
			{
				mod_access(self->data_mod, mod_access_load,
               				MRU_D_Summary[way].tag[way], NULL, NULL, NULL, NULL);
			}
		}
    }

    /*Prefetch MRU Instruction*/
    for (int i = 0; i < MAX_PATTERN_COUNT; ++i)
    {
		for (int way = 0; way < MRU_ASSOCIATIVITY; ++way)
		{
			if(MRU_I_Summary[way].tag[way] != 0)
			{
				mod_access(self->inst_mod, mod_access_load,
               				MRU_I_Summary[way].tag[way], NULL, NULL, NULL, NULL);
			}
		}
    }
}
Beispiel #15
0
static int x86_cpu_issue_lq(int core, int thread, int quant)
{
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *load;

	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !x86_reg_file_ready(load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		x86_lq_remove(core, thread);

		/* Access memory system */
		mod_access(X86_THREAD.data_mod, mod_access_load,
			load->phy_addr, NULL, X86_CORE.event_queue, load);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		load->in_event_queue = 1;
		load->issued = 1;
		load->issue_when = x86_cpu->cycle;
		
		/* Instruction issued */
		X86_CORE.issued[load->uinst->opcode]++;
		X86_CORE.lsq_reads++;
		X86_CORE.reg_file_int_reads += load->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count;
		X86_THREAD.issued[load->uinst->opcode]++;
		X86_THREAD.lsq_reads++;
		X86_THREAD.reg_file_int_reads += load->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count;
		x86_cpu->issued[load->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, load->core);
	}
	
	return quant;
}