コード例 #1
0
/* Remove all speculative uops from a given load/store queue in the
 * given thread. */
void X86ThreadRecoverLSQ(X86Thread *self) {
  struct linked_list_t *lq = self->lq;
  struct linked_list_t *sq = self->sq;
  struct x86_uop_t *uop;

  /* Recover load queue */
  linked_list_head(lq);
  while (!linked_list_is_end(lq)) {
    uop = linked_list_get(lq);
    if (uop->specmode) {
      X86ThreadRemoveFromLQ(self);
      x86_uop_free_if_not_queued(uop);
      continue;
    }
    linked_list_next(lq);
  }

  /* Recover store queue */
  linked_list_head(sq);
  while (!linked_list_is_end(sq)) {
    uop = linked_list_get(sq);
    if (uop->specmode) {
      X86ThreadRemoveFromSQ(self);
      x86_uop_free_if_not_queued(uop);
      continue;
    }
    linked_list_next(sq);
  }
}
コード例 #2
0
ファイル: issue.c プロジェクト: xianggong/multi2sim42
static int X86ThreadIssueSQ(X86Thread *self, int quantum) {
  X86Cpu *cpu = self->cpu;
  X86Core *core = self->core;

  struct x86_uop_t *store;
  struct linked_list_t *sq = self->sq;
  struct mod_client_info_t *client_info;

  /* Process SQ */
  linked_list_head(sq);
  while (!linked_list_is_end(sq) && quantum) {
    /* Get store */
    store = linked_list_get(sq);
    assert(store->uinst->opcode == x86_uinst_store);

    /* Only committed stores issue */
    if (store->in_rob) break;

    /* Check that memory system entry is ready */
    if (!mod_can_access(self->data_mod, store->phy_addr)) break;

    /* Remove store from store queue */
    X86ThreadRemoveFromSQ(self);

    /* create and fill the mod_client_info_t object */
    client_info = mod_client_info_create(self->data_mod);
    client_info->prefetcher_eip = store->eip;

    /* Issue store */
    mod_access(self->data_mod, mod_access_store, store->phy_addr, NULL,
               core->event_queue, store, client_info);

    /* The cache system will place the store at the head of the
     * event queue when it is ready. For now, mark "in_event_queue" to
     * prevent the uop from being freed. */
    store->in_event_queue = 1;
    store->issued = 1;
    store->issue_when = asTiming(cpu)->cycle;

    /* Statistics */
    core->num_issued_uinst_array[store->uinst->opcode]++;
    core->lsq_reads++;
    core->reg_file_int_reads += store->ph_int_idep_count;
    core->reg_file_fp_reads += store->ph_fp_idep_count;
    self->num_issued_uinst_array[store->uinst->opcode]++;
    self->lsq_reads++;
    self->reg_file_int_reads += store->ph_int_idep_count;
    self->reg_file_fp_reads += store->ph_fp_idep_count;
    cpu->num_issued_uinst_array[store->uinst->opcode]++;
    if (store->trace_cache) self->trace_cache->num_issued_uinst++;

    /* One more instruction, update quantum. */
    quantum--;

    /* MMU statistics */
    if (*mmu_report_file_name)
      mmu_access_page(store->phy_addr, mmu_access_write);
  }
  return quantum;
}
コード例 #3
0
ファイル: stg-issue.c プロジェクト: abhaykadam/vm
static int issue_sq(int core, int thread, int quant)
{
	struct uop_t *store;
	struct linked_list_t *sq = THREAD.sq;

	/* Process SQ */
	linked_list_head(sq);
	while (!linked_list_is_end(sq) && quant)
	{
		/* Get store */
		store = linked_list_get(sq);
		assert(store->uinst->opcode == x86_uinst_store);

		/* Only committed stores issue */
		if (store->in_rob)
			break;

		/* Check that memory system entry is ready */
		if (!mod_can_access(THREAD.data_mod, store->phy_addr))
			break;

		/* Remove store from store queue */
		sq_remove(core, thread);

		/* Issue store */
		mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_write,
			store->phy_addr, NULL, CORE.eventq, store);

		/* The cache system will place the store at the head of the
		 * event queue when it is ready. For now, mark "in_eventq" to
		 * prevent the uop from being freed. */
		store->in_eventq = 1;
		store->issued = 1;
		store->issue_when = cpu->cycle;
	
		/* Instruction issued */
		CORE.issued[store->uinst->opcode]++;
		CORE.lsq_reads++;
		CORE.rf_int_reads += store->ph_int_idep_count;
		CORE.rf_fp_reads += store->ph_fp_idep_count;
		THREAD.issued[store->uinst->opcode]++;
		THREAD.lsq_reads++;
		THREAD.rf_int_reads += store->ph_int_idep_count;
		THREAD.rf_fp_reads += store->ph_fp_idep_count;
		cpu->issued[store->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);

		/* Debug */
		esim_debug("uop action=\"update\", core=%d, seq=%llu,"
			" stg_issue=1, in_lsq=0, issued=1\n",
			store->core, (long long unsigned) store->di_seq);
	}
	return quant;
}
コード例 #4
0
ファイル: stg-issue.c プロジェクト: ajdupree/cs316-m2s
static int x86_cpu_issue_sq(int core, int thread, int quant)
{
	struct x86_uop_t *store;
	struct linked_list_t *sq = X86_THREAD.sq;

	/* Process SQ */
	linked_list_head(sq);
	while (!linked_list_is_end(sq) && quant)
	{
		/* Get store */
		store = linked_list_get(sq);
		assert(store->uinst->opcode == x86_uinst_store);

		/* Only committed stores issue */
		if (store->in_rob)
			break;

		/* Check that memory system entry is ready */
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
			break;

		/* Remove store from store queue */
		x86_sq_remove(core, thread);

		/* Issue store */
		mod_access(X86_THREAD.data_mod, mod_access_store,
			store->phy_addr, NULL, X86_CORE.event_queue, store);

		/* The cache system will place the store at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		store->in_event_queue = 1;
		store->issued = 1;
		store->issue_when = x86_cpu->cycle;
	
		/* Instruction issued */
		X86_CORE.issued[store->uinst->opcode]++;
		X86_CORE.lsq_reads++;
		X86_CORE.reg_file_int_reads += store->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += store->ph_fp_idep_count;
		X86_THREAD.issued[store->uinst->opcode]++;
		X86_THREAD.lsq_reads++;
		X86_THREAD.reg_file_int_reads += store->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += store->ph_fp_idep_count;
		x86_cpu->issued[store->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);
	}
	return quant;
}
コード例 #5
0
ファイル: uop.c プロジェクト: abhaykadam/vm
/* Update 'uop->ready' field of all instructions in a list as per the result
 * obtained by 'rf_ready'. The 'uop->ready' field is redundant and should always
 * match the return value of 'rf_ready' while an uop is in the ROB.
 * A debug message is dumped when the uop transitions to ready. */
void uop_lnlist_check_if_ready(struct linked_list_t *uop_list)
{
	struct uop_t *uop;
	linked_list_head(uop_list);
	for (linked_list_head(uop_list); !linked_list_is_end(uop_list); linked_list_next(uop_list)) {
		uop = linked_list_get(uop_list);
		if (uop->ready || !rf_ready(uop))
			continue;
		uop->ready = 1;
		esim_debug("uop action=\"update\", core=%d, seq=%lld, ready=1\n",
			uop->core, uop->di_seq);
	}
}
コード例 #6
0
ファイル: uop.c プロジェクト: 3upperm2n/gpuSimulators
void x86_uop_linked_list_dump(struct linked_list_t *uop_list, FILE *f)
{
	struct x86_uop_t *uop;
	
	linked_list_head(uop_list);
	while (!linked_list_is_end(uop_list))
	{
		uop = linked_list_get(uop_list);
		fprintf(f, "%3d. ", linked_list_current(uop_list));
		x86_uinst_dump(uop->uinst, f);
		fprintf(f, "\n");
		linked_list_next(uop_list);
	}
}
コード例 #7
0
void X86ThreadRecoverEventQueue(X86Thread *self)
{
	X86Core *core = self->core;

	struct linked_list_t *event_queue = core->event_queue;
	struct x86_uop_t *uop;

	linked_list_head(event_queue);
	while (!linked_list_is_end(event_queue))
	{
		uop = linked_list_get(event_queue);
		if (uop->thread == self && uop->specmode)
		{
			linked_list_remove(event_queue);
			uop->in_event_queue = 0;
			x86_uop_free_if_not_queued(uop);
			continue;
		}
		linked_list_next(event_queue);
	}
}
コード例 #8
0
ファイル: stg-issue.c プロジェクト: abhaykadam/vm
static int issue_iq(int core, int thread, int quant)
{
	struct linked_list_t *iq = THREAD.iq;
	struct uop_t *uop;
	int lat;

	/* Debug */
	if (esim_debug_file)
		uop_lnlist_check_if_ready(iq);
	
	/* Find instruction to issue */
	linked_list_head(iq);
	while (!linked_list_is_end(iq) && quant) {
		
		/* Get element from IQ */
		uop = linked_list_get(iq);
		assert(uop_exists(uop));
		assert(!(uop->flags & X86_UINST_MEM));
		if (!uop->ready && !rf_ready(uop)) {
			linked_list_next(iq);
			continue;
		}
		uop->ready = 1;  /* avoid next call to 'rf_ready' */
		
		/* Run the instruction in its corresponding functional unit.
		 * If the instruction does not require a functional unit, 'fu_reserve'
		 * returns 1 cycle latency. If there is no functional unit available,
		 * 'fu_reserve' returns 0. */
		lat = fu_reserve(uop);
		if (!lat) {
			linked_list_next(iq);
			continue;
		}
		
		/* Instruction was issued to the corresponding fu.
		 * Remove it from IQ */
		iq_remove(core, thread);
		
		/* Schedule inst in Event Queue */
		assert(!uop->in_eventq);
		assert(lat > 0);
		uop->issued = 1;
		uop->issue_when = cpu->cycle;
		uop->when = cpu->cycle + lat;
		eventq_insert(CORE.eventq, uop);
		
		/* Instruction issued */
		CORE.issued[uop->uinst->opcode]++;
		CORE.iq_reads++;
		CORE.rf_int_reads += uop->ph_int_idep_count;
		CORE.rf_fp_reads += uop->ph_fp_idep_count;
		THREAD.issued[uop->uinst->opcode]++;
		THREAD.iq_reads++;
		THREAD.rf_int_reads += uop->ph_int_idep_count;
		THREAD.rf_fp_reads += uop->ph_fp_idep_count;
		cpu->issued[uop->uinst->opcode]++;
		quant--;

		/* Debug */
		esim_debug("uop action=\"update\", core=%d, seq=%llu,"
			" stg_issue=1, in_iq=0, issued=1\n",
			uop->core, (long long unsigned) uop->di_seq);
	}
	
	return quant;
}
コード例 #9
0
ファイル: issue.c プロジェクト: 3upperm2n/gpuSimulators
static int X86ThreadIssueIQ(X86Thread *self, int quant)
{
	X86Cpu *cpu = self->cpu;
	X86Core *core = self->core;

	struct linked_list_t *iq = self->iq;
	struct x86_uop_t *uop;
	int lat;

	/* Find instruction to issue */
	linked_list_head(iq);
	while (!linked_list_is_end(iq) && quant)
	{
		/* Get element from IQ */
		uop = linked_list_get(iq);
		assert(x86_uop_exists(uop));
		assert(!(uop->flags & X86_UINST_MEM));
		if (!uop->ready && !X86ThreadIsUopReady(self, uop))
		{
			linked_list_next(iq);
			continue;
		}
		uop->ready = 1;  /* avoid next call to 'X86ThreadIsUopReady' */
		
		/* Run the instruction in its corresponding functional unit.
		 * If the instruction does not require a functional unit, 'X86CoreReserveFunctionalUnit'
		 * returns 1 cycle latency. If there is no functional unit available,
		 * 'X86CoreReserveFunctionalUnit' returns 0. */
		lat = X86CoreReserveFunctionalUnit(core, uop);
		if (!lat)
		{
			linked_list_next(iq);
			continue;
		}
		
		/* Instruction was issued to the corresponding fu.
		 * Remove it from IQ */
		X86ThreadRemoveFromIQ(self);
		
		/* Schedule inst in Event Queue */
		assert(!uop->in_event_queue);
		assert(lat > 0);
		uop->issued = 1;
		uop->issue_when = asTiming(cpu)->cycle;
		uop->when = asTiming(cpu)->cycle + lat;
		X86CoreInsertInEventQueue(core, uop);
		
		/* Statistics */
		core->num_issued_uinst_array[uop->uinst->opcode]++;
		core->iq_reads++;
		core->reg_file_int_reads += uop->ph_int_idep_count;
		core->reg_file_fp_reads += uop->ph_fp_idep_count;
		self->num_issued_uinst_array[uop->uinst->opcode]++;
		self->iq_reads++;
		self->reg_file_int_reads += uop->ph_int_idep_count;
		self->reg_file_fp_reads += uop->ph_fp_idep_count;
		cpu->num_issued_uinst_array[uop->uinst->opcode]++;
		if (uop->trace_cache)
			self->trace_cache->num_issued_uinst++;

		/* One more instruction issued, update quantum. */
		quant--;

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			uop->id_in_core, core->id);
	}
	
	return quant;
}
コード例 #10
0
ファイル: issue.c プロジェクト: 3upperm2n/gpuSimulators
static int X86ThreadIssuePreQ(X86Thread *self, int quantum)
{
	X86Core *core = self->core;
	X86Cpu *cpu = self->cpu;

	struct linked_list_t *preq = self->preq;
	struct x86_uop_t *prefetch;

	/* Process preq */
	linked_list_head(preq);
	while (!linked_list_is_end(preq) && quantum)
	{
		/* Get element from prefetch queue. If it is not ready, go to the next one */
		prefetch = linked_list_get(preq);
		if (!prefetch->ready && !X86ThreadIsUopReady(self, prefetch))
		{
			linked_list_next(preq);
			continue;
		}

		/* 
		 * Make sure its not been prefetched recently. This is just to avoid unnecessary
		 * memory traffic. Even though the cache will realise a "hit" on redundant 
		 * prefetches, its still helpful to avoid going to the memory (cache). 
		 */
		if (prefetch_history_is_redundant(core->prefetch_history,
							   self->data_mod, prefetch->phy_addr))
		{
			/* remove from queue. do not prefetch. */
			assert(prefetch->uinst->opcode == x86_uinst_prefetch);
			X86ThreadRemovePreQ(self);
			prefetch->completed = 1;
			x86_uop_free_if_not_queued(prefetch);
			continue;
		}

		prefetch->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(self->data_mod, prefetch->phy_addr))
		{
			linked_list_next(preq);
			continue;
		}

		/* Remove from prefetch queue */
		assert(prefetch->uinst->opcode == x86_uinst_prefetch);
		X86ThreadRemovePreQ(self);

		/* Access memory system */
		mod_access(self->data_mod, mod_access_prefetch,
			prefetch->phy_addr, NULL, core->event_queue, prefetch, NULL);

		/* Record prefetched address */
		prefetch_history_record(core->prefetch_history, prefetch->phy_addr);

		/* The cache system will place the prefetch at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		prefetch->in_event_queue = 1;
		prefetch->issued = 1;
		prefetch->issue_when = asTiming(cpu)->cycle;
		
		/* Statistics */
		core->num_issued_uinst_array[prefetch->uinst->opcode]++;
		core->lsq_reads++;
		core->reg_file_int_reads += prefetch->ph_int_idep_count;
		core->reg_file_fp_reads += prefetch->ph_fp_idep_count;
		self->num_issued_uinst_array[prefetch->uinst->opcode]++;
		self->lsq_reads++;
		self->reg_file_int_reads += prefetch->ph_int_idep_count;
		self->reg_file_fp_reads += prefetch->ph_fp_idep_count;
		cpu->num_issued_uinst_array[prefetch->uinst->opcode]++;
		if (prefetch->trace_cache)
			self->trace_cache->num_issued_uinst++;

		/* One more instruction issued, update quantum. */
		quantum--;
		
		/* MMU statistics */
		MMUAccessPage(cpu->mmu, prefetch->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			prefetch->id_in_core, core->id);
	}
	
	return quantum;
}
コード例 #11
0
ファイル: issue.c プロジェクト: 3upperm2n/gpuSimulators
static int X86ThreadIssueLQ(X86Thread *self, int quant)
{
	X86Core *core = self->core;
	X86Cpu *cpu = self->cpu;

	struct linked_list_t *lq = self->lq;
	struct x86_uop_t *load;
	struct mod_client_info_t *client_info;

	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !X86ThreadIsUopReady(self, load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(self->data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		X86ThreadRemoveFromLQ(self);

		/* create and fill the mod_client_info_t object */
		client_info = mod_client_info_create(self->data_mod);
		client_info->prefetcher_eip = load->eip;

		/* Access memory system */
		mod_access(self->data_mod, mod_access_load,
			load->phy_addr, NULL, core->event_queue, load, client_info);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		load->in_event_queue = 1;
		load->issued = 1;
		load->issue_when = asTiming(cpu)->cycle;
		
		/* Statistics */
		core->num_issued_uinst_array[load->uinst->opcode]++;
		core->lsq_reads++;
		core->reg_file_int_reads += load->ph_int_idep_count;
		core->reg_file_fp_reads += load->ph_fp_idep_count;
		self->num_issued_uinst_array[load->uinst->opcode]++;
		self->lsq_reads++;
		self->reg_file_int_reads += load->ph_int_idep_count;
		self->reg_file_fp_reads += load->ph_fp_idep_count;
		cpu->num_issued_uinst_array[load->uinst->opcode]++;
		if (load->trace_cache)
			self->trace_cache->num_issued_uinst++;

		/* One more instruction issued, update quantum. */
		quant--;
		
		/* MMU statistics */
		MMUAccessPage(cpu->mmu, load->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, core->id);
	}
	
	return quant;
}
コード例 #12
0
ファイル: acc.c プロジェクト: Peilong/multi2sim-4.0-hc
/*
 * ACC call #2 - accArith
 *
 * accArith - Arithmatic calculation for Whetstone Benchmark
 *
 * @return
 *	The function always returns 0 if running properly;
 *	ruturns -1 if illegal input value is detected
 */
static int x86_acc_func_accArith (struct x86_ctx_t *ctx)
{
	int core = 0; 
	int thread = 0;

	struct x86_regs_t *regs = ctx->regs;
	struct mem_t *mem = ctx->mem;


	unsigned int args_ptr;
	double func_args[4];
	/* Read arguments */
	args_ptr = regs->ecx;
	printf ("args_ptr = %u(0x%x)\n\n",args_ptr,args_ptr);
	func_args[0] = 1.0;
	func_args[1] = -1.0;
	func_args[2] = -1.0;
	func_args[3] = -1.0;

	/* Get function info */
	//mem_read(mem, args_ptr, sizeof(double), func_args);

/*
	mem_read(mem, args_ptr+4, 8, func_args[1] );
	mem_read(mem, args_ptr+8, 8, func_args[2] );
	mem_read(mem, args_ptr+12, 8, func_args[3] );
	mem_read(mem, args_ptr+16, 8, func_args[4] );
*/
	//func_args[0] = &args_ptr;
	//func_args[1] = &args_ptr+8;
	//func_args[2] = &args_ptr+16;
	//func_args[3] = &args_ptr+24;
	//func_args[4] = &args_ptr+60;
	printf("*******************************\n");
	printf("In Emulation\n");
	//printf("\t\tfunc_args = %u (0x%x)\n", func_args, func_args);
	printf ("Cycle when getting into this call is %lld\n\n", x86_cpu->cycle); 

	/***********************************************/


	struct linked_list_t *sq = X86_THREAD.sq;
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *store;
	struct x86_uop_t *load;
	int quant = x86_cpu_issue_width;
	

	linked_list_head(sq);
	while (!linked_list_is_end(sq)&& quant )
	{
		store = linked_list_get(sq);
		printf ("physical addr @ store: %d\n",store->phy_addr);
		//assert(store->uinst->opcode == x86_uinst_store);
		if (!store->ready && !x86_reg_file_ready(store))
		{
			linked_list_next(sq);
			continue;
		}


		store->ready = 1;
		//printf ("physical add: %d\n",load->phy_addr);
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
		{
			//printf("Debug Point 5\n");
			linked_list_next(sq);
			continue;
		}
		
		int i = 9000;
		while (i--)
		{
			//printf("Debug Point 6\n");
			mod_access(X86_THREAD.data_mod, mod_access_store,
				store->phy_addr, NULL, X86_CORE.event_queue, store);
		}
		quant--;
		
		// MMU statistics
		if (*mmu_report_file_name)
			mmu_access_page(store->phy_addr, mmu_access_write);

	}

	quant = x86_cpu_issue_width;
	
	linked_list_head(lq);
	while (!linked_list_is_end(lq)&& quant )
	{
		load = linked_list_get(lq);
		printf ("physical add @ load: %d\n",load->phy_addr);
		//assert(store->uinst->opcode == x86_uinst_store);
		load->ready = 1;
		if (!load->ready && !x86_reg_file_ready(load))
		{
			printf("load debug point 1\n");
			linked_list_next(sq);
			continue;
		}


		load->ready = 1;
		//printf ("physical add: %d\n",load->phy_addr);
		if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
		{
			printf("load debug point 2\n");
			linked_list_next(lq);
			continue;
		}
		
		int j = 9000;
		while (j--)
		{
			//printf("load debug point 3\n");
			mod_access(X86_THREAD.data_mod, mod_access_load,
				load->phy_addr, NULL, X86_CORE.event_queue, load);
		}
		quant--;

		//printf("load debug point 4: quant = %d\n", quant);

		// MMU statistics
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		// Trace
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, load->core);
	}



	/***********************************************/
	
/*
	printf("\t\tfunc_args[0] = %u (0x%x)\n", func_args[0], func_args[0]);
	printf("\t\tfunc_args[1] = %u (0x%x)\n", func_args[1], func_args[1]);
	printf("\t\tfunc_args[2] = %u (0x%x)\n", func_args[2], func_args[2]);
	printf("\t\tfunc_args[3] = %u (0x%x)\n", func_args[3], func_args[3]);
	printf("\t\tfunc_args[4] = %u (0x%x)\n", func_args[4], func_args[4]);

	printf("get here 1\n");
*/
/*
	printf("Value:\n\t\tfunc_args[0] = %f (0x%x)\n", *func_args[0], *func_args[0]);
	printf("\t\tfunc_args[1] = %f (0x%x)\n", *func_args[1], *func_args[1]);
	printf("\t\tfunc_args[2] = %f (0x%x)\n", *func_args[2], *func_args[2]);
	printf("\t\tfunc_args[3] = %f (0x%x)\n", *func_args[3], *func_args[3]);
	printf("\t\tfunc_args[4] = %f (0x%x)\n", *func_args[4], *func_args[4]);
*/
/*
	double *A0 = func_args[0];
	double *A1 = func_args[1];
	double *A2 = func_args[2];
	double *A3 = func_args[3];
	double *A4 = func_args[4];
*/
	//double N = *A0;

	//printf("get here\n");
/*
	printf("\t\tN  = %f (0x%x)\n", *A0,*A0);
	printf("\t\tA1 = %f (0x%x)\n", *A1,*A1);
	printf("\t\tA2 = %f (0x%x)\n", *A2,*A2);
	printf("\t\tA3 = %f (0x%x)\n", *A3,*A3);
	printf("\t\tA4 = %f (0x%x)\n", *A4,*A4);
*/

	double T = 0.499975;

	printf ("func_args1 = %f, func_args2 = %f, func_args3 = %f, func_args4 = %f\n",func_args[0],func_args[1],func_args[2],func_args[3]);

/*
	func_args[0] = (func_args[0] + func_args[1] + func_args[2] - func_args[3])*T;
	func_args[1] = (func_args[0] + func_args[1] - func_args[2] + func_args[3])*T;
	func_args[2] = (func_args[0] - func_args[1] + func_args[2] - func_args[3])*T;
	func_args[3] = (-func_args[0] + func_args[1] + func_args[2] + func_args[3])*T;
*/
	return 0;
}
コード例 #13
0
ファイル: faults.c プロジェクト: Peilong/multi2sim-4.0-hc
void evg_faults_insert(void)
{
	struct evg_fault_t *fault;
	struct evg_compute_unit_t *compute_unit;

	for (;;)
	{
		linked_list_head(evg_fault_list);
		fault = linked_list_get(evg_fault_list);
		if (!fault || fault->cycle > evg_gpu->cycle)
			break;

		/* Insert fault depending on fault type */
		switch (fault->type)
		{

		case evg_fault_ams:
		{
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;
			struct evg_work_item_t *work_item;

			int work_group_id;  /* in compute unit */
			int wavefront_id;  /* in compute unit */
			int value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"ams\" stack=%d am=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id, fault->stack_id,
				fault->active_mask_id, fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group and wavefront. If wavefront ID exceeds current number, dismiss */
			work_group_id = fault->stack_id / evg_gpu->ndrange->wavefronts_per_work_group;
			wavefront_id = fault->stack_id % evg_gpu->ndrange->wavefronts_per_work_group;
			if (work_group_id >= evg_gpu_max_work_groups_per_compute_unit
				|| !compute_unit->work_groups[work_group_id])
			{
				evg_faults_debug("effect=\"wf_idle\"");
				goto end_loop;
			}
			work_group = compute_unit->work_groups[work_group_id];
			wavefront = work_group->wavefronts[wavefront_id];

			/* If active_mask_id exceeds stack top, dismiss */
			if (fault->active_mask_id > wavefront->stack_top)
			{
				evg_faults_debug("effect=\"am_idle\"");
				goto end_loop;
			}

			/* If 'bit' exceeds number of work-items in wavefront, dismiss */
			if (fault->bit >= wavefront->work_item_count)
			{
				evg_faults_debug("effect=\"wi_idle\"");
				goto end_loop;
			}

			/* Fault caused an error, show affected software entities */
			work_item = wavefront->work_items[fault->bit];
			evg_faults_debug("effect=\"error\" wg=%d wf=%d wi=%d",
				work_group->id,
				wavefront->id,
				work_item->id);

			/* Inject fault */
			value = bit_map_get(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1);
			bit_map_set(wavefront->active_stack,
				fault->active_mask_id * wavefront->work_item_count
				+ fault->bit, 1, !value);
			evg_fault_errors++;

			break;
		}

		case evg_fault_reg:
		{
			struct evg_opencl_kernel_t *kernel = evg_gpu->ndrange->kernel;

			int work_group_id_in_compute_unit;
			struct evg_work_group_t *work_group;
			struct evg_wavefront_t *wavefront;

			int num_registers_per_work_group;

			int work_item_id_in_compute_unit;
			int work_item_id_in_work_group;
			struct evg_work_item_t *work_item;

			struct linked_list_t *fetch_queue;
			struct evg_uop_t *inst_buffer;
			struct evg_uop_t *exec_buffer;
			struct heap_t *event_queue;
			struct evg_uop_t *uop;

			int lo_reg;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"reg\" reg=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->reg_id,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			num_registers_per_work_group = kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used
				* kernel->local_size;
			work_group_id_in_compute_unit = fault->reg_id / num_registers_per_work_group;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"reg_idle\"");
				goto end_loop;
			}

			/* Get affected entities */
			work_item_id_in_compute_unit = fault->reg_id
				/ kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;
			work_item_id_in_work_group = work_item_id_in_compute_unit % kernel->local_size;
			work_item = work_group->work_items[work_item_id_in_work_group];
			wavefront = work_item->wavefront;
			lo_reg = fault->reg_id % kernel->bin_file->enc_dict_entry_evergreen->num_gpr_used;

			/* Fault falling between Fetch and Read stage of an instruction
			 * consuming register. This case cannot be modeled due to functional
			 * simulation skew. */
			fetch_queue = compute_unit->alu_engine.fetch_queue;
			inst_buffer = compute_unit->alu_engine.inst_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_idep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_read\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_idep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_read\"");
				goto end_loop;
			}

			/* Fault falling between Fetch and Write stage of an instruction
			 * writing on the register. The instruction will overwrite the fault,
			 * so this shouldn't cause its injection. */
			exec_buffer = compute_unit->alu_engine.exec_buffer;
			for (linked_list_head(fetch_queue); !linked_list_is_end(fetch_queue);
				linked_list_next(fetch_queue))
			{
				uop = linked_list_get(fetch_queue);
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}
			uop = inst_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			uop = exec_buffer;
			if (uop && evg_stack_faults_is_odep(uop, wavefront, lo_reg))
			{
				evg_faults_debug("effect=\"reg_write\"");
				goto end_loop;
			}
			event_queue = compute_unit->alu_engine.event_queue;
			for (heap_first(event_queue, (void **) &uop); uop;
				heap_next(event_queue, (void **) &uop))
			{
				if (evg_stack_faults_is_odep(uop, wavefront, lo_reg))
				{
					evg_faults_debug("effect=\"reg_write\"");
					goto end_loop;
				}
			}

			/* Fault caused error */
			evg_faults_debug("effect=\"error\" ");
			evg_faults_debug("wg=%d wf=%d wi=%d lo_reg=%d ",
				work_group->id, work_item->wavefront->id, work_item->id, lo_reg);

			/* Insert the fault */
			if (fault->bit < 32)
				work_item->gpr[lo_reg].elem[0] ^= 1 << fault->bit;
			else if (fault->bit < 64)
				work_item->gpr[lo_reg].elem[1] ^= 1 << (fault->bit - 32);
			else if (fault->bit < 96)
				work_item->gpr[lo_reg].elem[2] ^= 1 << (fault->bit - 64);
			else
				work_item->gpr[lo_reg].elem[3] ^= 1 << (fault->bit - 96);
			evg_fault_errors++;

			break;

		}

		case evg_fault_mem:
		{
			struct evg_work_group_t *work_group;

			int work_group_id_in_compute_unit;
			unsigned char value;

			/* Initial debug */
			evg_faults_debug("fault clk=%lld cu=%d type=\"mem\" byte=%d bit=%d ",
				evg_gpu->cycle,
				fault->compute_unit_id,
				fault->byte,
				fault->bit);
			assert(fault->cycle == evg_gpu->cycle);
			compute_unit = evg_gpu->compute_units[fault->compute_unit_id];

			/* If compute unit is idle, dismiss */
			if (!compute_unit->work_group_count)
			{
				evg_faults_debug("effect=\"cu_idle\"");
				goto end_loop;
			}

			/* Check if there is any local memory used at all */
			if (!evg_gpu->ndrange->local_mem_top)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group */
			work_group_id_in_compute_unit = fault->byte / evg_gpu->ndrange->local_mem_top;
			if (work_group_id_in_compute_unit >= evg_gpu_max_work_groups_per_compute_unit)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Get work-group (again) */
			work_group = compute_unit->work_groups[work_group_id_in_compute_unit];
			if (!work_group)
			{
				evg_faults_debug("effect=\"mem_idle\"");
				goto end_loop;
			}

			/* Inject fault */
			evg_faults_debug("effect=\"error\" wg=%d ",
				work_group->id);
			mem_read(work_group->local_mem, fault->byte, 1, &value);
			value ^= 1 << fault->bit;
			mem_write(work_group->local_mem, fault->byte, 1, &value);
			evg_fault_errors++;

			break;

		}

		default:
			panic("invalid fault type");

		}

end_loop:
		/* Extract and free */
		free(fault);
		linked_list_remove(evg_fault_list);
		evg_faults_debug("\n");

		/* If all faults were inserted and no error was caused, end simulation */
		if (!linked_list_count(evg_fault_list) && !evg_fault_errors)
			esim_finish = esim_finish_evg_no_faults;
	}
}
コード例 #14
0
ファイル: stg-issue.c プロジェクト: ajdupree/cs316-m2s
static int x86_cpu_issue_lq(int core, int thread, int quant)
{
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *load;

	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !x86_reg_file_ready(load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(X86_THREAD.data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		x86_lq_remove(core, thread);

		/* Access memory system */
		mod_access(X86_THREAD.data_mod, mod_access_load,
			load->phy_addr, NULL, X86_CORE.event_queue, load);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_event_queue" to
		 * prevent the uop from being freed. */
		load->in_event_queue = 1;
		load->issued = 1;
		load->issue_when = x86_cpu->cycle;
		
		/* Instruction issued */
		X86_CORE.issued[load->uinst->opcode]++;
		X86_CORE.lsq_reads++;
		X86_CORE.reg_file_int_reads += load->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += load->ph_fp_idep_count;
		X86_THREAD.issued[load->uinst->opcode]++;
		X86_THREAD.lsq_reads++;
		X86_THREAD.reg_file_int_reads += load->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += load->ph_fp_idep_count;
		x86_cpu->issued[load->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			load->id_in_core, load->core);
	}
	
	return quant;
}
コード例 #15
0
ファイル: stg-issue.c プロジェクト: ajdupree/cs316-m2s
static int x86_cpu_issue_iq(int core, int thread, int quant)
{
	struct linked_list_t *iq = X86_THREAD.iq;
	struct x86_uop_t *uop;
	int lat;

	/* Find instruction to issue */
	linked_list_head(iq);
	while (!linked_list_is_end(iq) && quant)
	{
		/* Get element from IQ */
		uop = linked_list_get(iq);
		assert(x86_uop_exists(uop));
		assert(!(uop->flags & X86_UINST_MEM));
		if (!uop->ready && !x86_reg_file_ready(uop))
		{
			linked_list_next(iq);
			continue;
		}
		uop->ready = 1;  /* avoid next call to 'x86_reg_file_ready' */
		
		/* Run the instruction in its corresponding functional unit.
		 * If the instruction does not require a functional unit, 'x86_fu_reserve'
		 * returns 1 cycle latency. If there is no functional unit available,
		 * 'x86_fu_reserve' returns 0. */
		lat = x86_fu_reserve(uop);
		if (!lat)
		{
			linked_list_next(iq);
			continue;
		}
		
		/* Instruction was issued to the corresponding fu.
		 * Remove it from IQ */
		x86_iq_remove(core, thread);
		
		/* Schedule inst in Event Queue */
		assert(!uop->in_event_queue);
		assert(lat > 0);
		uop->issued = 1;
		uop->issue_when = x86_cpu->cycle;
		uop->when = x86_cpu->cycle + lat;
		x86_event_queue_insert(X86_CORE.event_queue, uop);
		
		/* Instruction issued */
		X86_CORE.issued[uop->uinst->opcode]++;
		X86_CORE.iq_reads++;
		X86_CORE.reg_file_int_reads += uop->ph_int_idep_count;
		X86_CORE.reg_file_fp_reads += uop->ph_fp_idep_count;
		X86_THREAD.issued[uop->uinst->opcode]++;
		X86_THREAD.iq_reads++;
		X86_THREAD.reg_file_int_reads += uop->ph_int_idep_count;
		X86_THREAD.reg_file_fp_reads += uop->ph_fp_idep_count;
		x86_cpu->issued[uop->uinst->opcode]++;
		quant--;

		/* Trace */
		x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
			uop->id_in_core, uop->core);
	}
	
	return quant;
}
コード例 #16
0
ファイル: isa.c プロジェクト: Peilong/multi2sim-4.0-hc
void evg_isa_write_task_commit(struct evg_work_item_t *work_item)
{
	struct linked_list_t *task_list = work_item->write_task_list;
	struct evg_wavefront_t *wavefront = work_item->wavefront;
	struct evg_work_group_t *work_group = work_item->work_group;

	struct evg_isa_write_task_t *wt;
	struct evg_inst_t *inst;

	/* Process first tasks of type:
	 *  - EVG_ISA_WRITE_TASK_WRITE_DEST
	 *  - EVG_ISA_WRITE_TASK_WRITE_LDS
	 */
	for (linked_list_head(task_list); !linked_list_is_end(task_list); )
	{

		/* Get task */
		wt = linked_list_get(task_list);
		assert(wt->work_item == work_item);
		inst = wt->inst;

		switch (wt->kind)
		{
		
		case EVG_ISA_WRITE_TASK_WRITE_DEST:
		{
			if (wt->write_mask)
				evg_isa_write_gpr(work_item, wt->gpr, wt->rel, wt->chan, wt->value);
			work_item->pv.elem[wt->inst->alu] = wt->value;

			/* Debug */
			if (evg_isa_debugging())
			{
				evg_isa_debug("  i%d:%s", work_item->id,
					map_value(&evg_pv_map, wt->inst->alu));
				if (wt->write_mask)
				{
					evg_isa_debug(",");
					evg_inst_dump_gpr(wt->gpr, wt->rel, wt->chan, 0,
						debug_file(evg_isa_debug_category));
				}
				evg_isa_debug("<=");
				gpu_isa_dest_value_dump(inst, &wt->value,
					debug_file(evg_isa_debug_category));
			}

			break;
		}

		case EVG_ISA_WRITE_TASK_WRITE_LDS:
		{
			struct mem_t *local_mem;
			union evg_reg_t lds_value;

			local_mem = work_group->local_mem;
			assert(local_mem);
			assert(wt->lds_value_size);
			mem_write(local_mem, wt->lds_addr, wt->lds_value_size, &wt->lds_value);

			/* Debug */
			lds_value.as_uint = wt->lds_value;
			evg_isa_debug("  i%d:LDS[0x%x]<=(%u,%gf) (%d bytes)", work_item->id, wt->lds_addr,
				lds_value.as_uint, lds_value.as_float, (int) wt->lds_value_size);
			break;
		}

		default:
			linked_list_next(task_list);
			continue;
		}

		/* Done with this task */
		repos_free_object(evg_isa_write_task_repos, wt);
		linked_list_remove(task_list);
	}

	/* Process PUSH_BEFORE, PRED_SET */
	for (linked_list_head(task_list); !linked_list_is_end(task_list); )
	{
		/* Get task */
		wt = linked_list_get(task_list);
		inst = wt->inst;

		/* Process */
		switch (wt->kind)
		{

		case EVG_ISA_WRITE_TASK_PUSH_BEFORE:
		{
			if (!wavefront->push_before_done)
				evg_wavefront_stack_push(wavefront);
			wavefront->push_before_done = 1;
			break;
		}

		case EVG_ISA_WRITE_TASK_SET_PRED:
		{
			int update_pred = EVG_ALU_WORD1_OP2.update_pred;
			int update_exec_mask = EVG_ALU_WORD1_OP2.update_exec_mask;

			assert(inst->info->fmt[1] == EVG_FMT_ALU_WORD1_OP2);
			if (update_pred)
				evg_work_item_set_pred(work_item, wt->cond);
			if (update_exec_mask)
				evg_work_item_set_active(work_item, wt->cond);

			/* Debug */
			if (debug_status(evg_isa_debug_category))
			{
				if (update_pred && update_exec_mask)
					evg_isa_debug("  i%d:act/pred<=%d", work_item->id, wt->cond);
				else if (update_pred)
					evg_isa_debug("  i%d:pred=%d", work_item->id, wt->cond);
				else if (update_exec_mask)
					evg_isa_debug("  i%d:pred=%d", work_item->id, wt->cond);
			}
			break;
		}

		default:
			abort();
		}
		
		/* Done with task */
		repos_free_object(evg_isa_write_task_repos, wt);
		linked_list_remove(task_list);
	}

	/* List should be empty */
	assert(!linked_list_count(task_list));
}
コード例 #17
0
ファイル: stg-issue.c プロジェクト: abhaykadam/vm
static int issue_lq(int core, int thread, int quant)
{
	struct linked_list_t *lq = THREAD.lq;
	struct uop_t *load;

	/* Debug */
	if (esim_debug_file)
		uop_lnlist_check_if_ready(lq);
	
	/* Process lq */
	linked_list_head(lq);
	while (!linked_list_is_end(lq) && quant)
	{
		/* Get element from load queue. If it is not ready, go to the next one */
		load = linked_list_get(lq);
		if (!load->ready && !rf_ready(load))
		{
			linked_list_next(lq);
			continue;
		}
		load->ready = 1;

		/* Check that memory system is accessible */
		if (!mod_can_access(THREAD.data_mod, load->phy_addr))
		{
			linked_list_next(lq);
			continue;
		}

		/* Remove from load queue */
		assert(load->uinst->opcode == x86_uinst_load);
		lq_remove(core, thread);

		/* Access memory system */
		mod_access(THREAD.data_mod, mod_entry_cpu, mod_access_read,
			load->phy_addr, NULL, CORE.eventq, load);

		/* The cache system will place the load at the head of the
		 * event queue when it is ready. For now, mark "in_eventq" to
		 * prevent the uop from being freed. */
		load->in_eventq = 1;
		load->issued = 1;
		load->issue_when = cpu->cycle;
		
		/* Instruction issued */
		CORE.issued[load->uinst->opcode]++;
		CORE.lsq_reads++;
		CORE.rf_int_reads += load->ph_int_idep_count;
		CORE.rf_fp_reads += load->ph_fp_idep_count;
		THREAD.issued[load->uinst->opcode]++;
		THREAD.lsq_reads++;
		THREAD.rf_int_reads += load->ph_int_idep_count;
		THREAD.rf_fp_reads += load->ph_fp_idep_count;
		cpu->issued[load->uinst->opcode]++;
		quant--;
		
		/* MMU statistics */
		if (*mmu_report_file_name)
			mmu_access_page(load->phy_addr, mmu_access_read);

		/* Debug */
		esim_debug("uop action=\"update\", core=%d, seq=%llu,"
			" stg_issue=1, in_lsq=0, issued=1\n",
			load->core, (long long unsigned) load->di_seq);
	}
	
	return quant;
}
コード例 #18
0
ファイル: acc.c プロジェクト: Peilong/multi2sim-4.0-hc
static int x86_acc_func_accDTW (struct x86_ctx_t *ctx)
{
	struct x86_regs_t *regs = ctx->regs;
	struct mem_t *mem = ctx->mem;
	int core; 
	int thread;
	
	unsigned int args_ptr;
	//int x;
	struct arglist func_args;
	/* Read arguments */
	args_ptr = regs->ecx;


	/* Get function info */
	mem_read(mem, args_ptr, sizeof(arglist), &func_args);
	printf("\t\t**sample1 = %p (%p)\n", func_args.sample1, &(func_args.sample1[0][0]));
	printf("\t\tlength1      = %u (%p)\n", func_args.length1, &func_args.length1);
	printf("\t\t**sample2 = %p (%p)\n", func_args.sample2, &(func_args.sample2[0][0]));
	printf("\t\tlength2      = %u (%p)\n", func_args.length2, &func_args.length2);
	printf("\t\ti               = %u (%p)\n", func_args.i, &func_args.i);
	printf("\t\tj               = %u (%p)\n", func_args.j, &func_args.j);
	printf("\t\t*table       = %p (%p)\n", func_args.table, &(func_args.table[0]));

	/***********************************************/

#define L2ONLY
#define WITHACC


#ifdef L2ONLY
	printf ("Cache Behavior Simulation\n");
	char * mod_name = "mod-l2-0";
	X86_THREAD.data_mod = mem_system_get_mod (mod_name);
#endif

#ifdef WITHACC
	struct linked_list_t *sq = X86_THREAD.sq;
	struct linked_list_t *lq = X86_THREAD.lq;
	struct x86_uop_t *store;
	struct x86_uop_t *load;
	int quant = x86_cpu_issue_width;
	unsigned int count1, count2;

	for (count1 = 0; count1 < 124; count1 ++)
	{
		for(count2 = 0; count2 < 124; count2++)
		{
			linked_list_head(sq);
			while (!linked_list_is_end(sq)&& quant )
			{
				store = linked_list_get(sq);
				printf("\n\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n\n");
				printf("physical addr @ store: %d\n",store->phy_addr);
				assert(store->uinst->opcode == x86_uinst_store);
				if (!store->ready && !x86_reg_file_ready(store))
				{
					linked_list_next(sq);
					continue;
				}


				store->ready = 1;
				printf ("data module kind: %d\n",X86_THREAD.data_mod->kind);
				printf ("data module level: %d\n",X86_THREAD.data_mod->level);
				printf ("data module name: %s\n",X86_THREAD.data_mod->name);
				printf ("data module cache name: %s\n",X86_THREAD.data_mod->cache->name);
				if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
				{
					linked_list_next(sq);
					continue;
				}
		
				int i = 3;
				while (i--)
				{
					printf("Store Debug Point 6\n");
					mod_access(X86_THREAD.data_mod, mod_access_store,
						store->phy_addr, NULL, X86_CORE.event_queue, store);
				}
				quant--;
		
				// MMU statistics
				if (*mmu_report_file_name)
					mmu_access_page(store->phy_addr, mmu_access_write);

			}

			quant = x86_cpu_issue_width;

			//printf("Load Simulation ... \n");	
			linked_list_head(lq);
			while (!linked_list_is_end(lq)&& quant )
			{
				load = linked_list_get(lq);
				printf ("physical add @ load: %d\n",load->phy_addr);
				assert(store->uinst->opcode == x86_uinst_store);
				load->ready = 1;
				if (!load->ready && !x86_reg_file_ready(load))
				{
					printf("load debug point 1\n");
					linked_list_next(sq);
					continue;
				}


				load->ready = 1;
				//printf ("physical add: %d\n",load->phy_addr);
				if (!mod_can_access(X86_THREAD.data_mod, store->phy_addr))
				{
					printf("load debug point 2\n");
					linked_list_next(lq);
					continue;
				}
		
				int j = 1;
				while (j--)
				{
					printf("load debug point 3\n");
					mod_access(X86_THREAD.data_mod, mod_access_load,
						load->phy_addr, NULL, X86_CORE.event_queue, load);
				}
				quant--;

				//printf("load debug point 4: quant = %d\n", quant);

				// MMU statistics
				if (*mmu_report_file_name)
					mmu_access_page(load->phy_addr, mmu_access_read);

				// Trace
				x86_trace("x86.inst id=%lld core=%d stg=\"i\"\n",
					load->id_in_core, load->core);
			}
		}
	}
#endif

#ifdef L2ONLY
	mod_name = "mod-dl1-0";
	X86_THREAD.data_mod = mem_system_get_mod (mod_name);
#endif

	/***********************************************/
	int ret = DTWdistance(func_args.sample1, func_args.length1, func_args.sample2, 
		func_args.length2, func_args.i, func_args.j, func_args.table);
	return ret;

}