Example #1
0
void FE_stage()
{
  /* only part of FE_stage function is implemented */ 
  /* please complete the rest of FE_stage function */ 

  if(stop_fetching)
  	return;

  if(FE_latch->op_valid || FE_latch->pipeline_stall_enabled)
  {
	  /* Data inside the latch is valid and next stage is still using it.
	     Or ID stage has enabled pipeline stalling because of a branch instruction.
	     Do not fetch */
	  return;
  }

  Op *op = get_free_op();
  bool next_op_exists = get_op(op);
  if(!next_op_exists)			/* Indicate the end_of_trace by the FE cycle */
  {
	free_op(op);
	op = get_free_op();
	op->is_last_instruction = true;
	stop_fetching = true;
  }

  /* hwsim : get the instruction and pass to ID phase */
  FE_latch->op = op;				/* pass the op to ID stage */
  FE_latch->op_valid = true;			/* Mark it as valid */
  FE_latch->pipeline_stall_enabled = false;	/* Disable pipeline stall, if any */

  //   next_pc = pc + op->inst_size;  // you need this code for building a branch predictor 

}
Example #2
0
void WB_stage()
{
  /* You MUST call free_op function here after an op is retired */ 
  if(!MEM_latch->op_valid)
	  return;

  Op *op = MEM_latch->op;
  if((op->dst >= 0) && (op->dst < NUM_REG))
  {
	/* Mark the destination register as not busy */
	register_file[op->dst].busy = false;
  }
  /* If it is a branch instruction, the address of branch is now available.
     Tell the FE_stage to Fetch next instruction */
  if((op->cf_type >= CF_BR) && (op->cf_type < NUM_CF_TYPES))
  {
	FE_latch->pipeline_stall_enabled = false;
  }

  if(op->is_last_instruction)
  {
	  sim_end_condition = true;
	  retired_instruction--;
	  cycle_count--;
  }
  
  retired_instruction++;
  MEM_latch->op_valid = false;
  free_op(op);
}
void FE_stage()
{
  /* only part of FE_stage function is implemented */ 
  /* please complete the rest of FE_stage function */ 
	
	
	if(FE_latch->stage_stall==false)	//check if no pipeline stalled
	{
		Op* new_op = get_free_op();	//get a placeholder op out of the pool
		if(get_op(new_op))		//copy op from trace into the placeholder op
		{
			if((new_op->opcode>=OP_FMEM)&&(new_op->opcode<=OP_FCMO)){
				float_count++;
			}
			if((new_op->opcode>=OP_IADD)&&(new_op->opcode<=OP_MM)){
				integer_count++;
			}
			if((new_op->opcode>=OP_CF)&&(new_op->opcode<=OP_LDA)){
				if(new_op->opcode==OP_CF) branch_count++;				
				integer_count++;
			}
			if((new_op->opcode>=OP_LDA)&&(new_op->opcode<=OP_LD)){
				load_count++;
			}
			if((new_op->opcode==OP_MM)||(new_op->opcode==OP_IMUL)){
				multiple_count++;
			}

			if(new_op->opcode==OP_ST){
				store_count++;
			}			
			if((new_op->opcode == OP_CF) && new_op->cf_type==CF_CBR && use_bpred==true)
			{
				uint64_t pc = new_op->instruction_addr;
				bool predicted_dir=0;		
				predicted_dir=bpred_access(branchpred,pc);
				bpred_update(branchpred,pc,predicted_dir,new_op->actually_taken);
				if(new_op->actually_taken != predicted_dir)
				{
					new_op->mispredicted_branch=true;
				}
				bpred_mispred_count=branchpred->mispred;
				bpred_okpred_count=branchpred->okpred;
			}

			FE_latch->op=new_op;
			FE_latch->op_valid=true;
		}
		else
			free_op(new_op);
	}
	
  //   next_pc = pc + op->inst_size;  // you need this code for building a branch predictor 

}
Example #4
0
t_op		*pop_op(t_op *ops, char *nick)
{
  t_op		*tmp;

  while (ops != NULL && strcmp(ops->nick, nick) != 0)
    ops = ops->next;

  if (ops != NULL)
    {
      tmp = ops;
      ops = pop_element(ops, tmp);
      free_op(tmp);
    }

  return (first_op(ops));
}
void WB_stage()
{
  /* You MUST call free_op function here after an op is retired */ 
  /* you must complete the function */
	while(!MEM_latch->op_queue.empty())
	{
		Op *retire_op = MEM_latch->op_queue.front();
		
		if(retire_op->cf_type>=CF_BR && ((!use_bpred) || (use_bpred && (MEM_CONDITIONAL_BRANCH && MEM_MISPRED_BRANCH))))
		{
			FE_latch->stage_stall = false;
		}
		if((retire_op->dst!=-1) && (reg_writing_ops[retire_op->dst] == retire_op->inst_id))
			register_file[retire_op->dst].valid=true;
		retired_instruction++;
		MEM_latch->op_queue.pop_front();
		free_op(retire_op);
		
		//if(retire_op->inst_id == last_inst_id)
			//sim_end_condition = true;
	}
	MEM_latch->op_valid=false;		//not used
}
Example #6
0
void broadcast_rdy_op(Op* op)             // NEW-LAB2 
{                                          // NEW-LAB2 
	/* you must complete the function */     // NEW-LAB2 
	/* If the op broadcasted is a dummy op,
	   then do not pass it to write back stage to retire.
	   Install it in TLB.
	 */
	if(op->is_pteop)
	{
		uint64_t pfn = vmem_vpn_to_pfn(op->vpn, op->thread_id);
		tlb_install(dtlb,op->vpn,op->thread_id,pfn);
		free_op(op);
		pteop_returning_on_mshr_full = false;
		EX_latch->pipeline_stall_enabled = false;
		pteaddr_broadcasted = true;
		return;
	}
	// mem ops are done.  move the op into WB stage   // NEW-LAB2 
	WB_pending.push_back(op);
	WB_pending_at_cycle.push_back(cycle_count);
/*	MEM_latch->oplist.push_back(op);
	MEM_latch->op_valid = true;
*/
}      // NEW-LAB2 
Example #7
0
void MEM_stage(memory_c *main_memory)  // please modify MEM_stage function argument  /** NEW-LAB2 */ 
{
	if((MEM_latch->op_valid) || (!EX_latch->op_valid))
		return;

	Op *op = EX_latch->op;
	int threadid = op->thread_id;		/* keep this 0 for LAB 3 */
	uint64_t effective_latency = dcache_latency - 1;

	if(!((op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES)))	// Not a memory op
	{
			MEM_latch->op = op;		// deprecated
			MEM_latch->oplist.push_back(op);
			MEM_latch->op_valid = true;
			EX_latch->op_valid = false;
			return;
	}
	
	UINT64 ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr;
	if(pteaddr_broadcasted)
	{
		/* We have all translations now. Set up the op to do cache access
		   so that actual data is now loaded */
		uint64_t vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		uint64_t index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		uint64_t pteaddr = vmem_get_pteaddr(vpn,threadid);
		uint64_t pfn = vmem_vpn_to_pfn(vpn,threadid);
		ac_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index;
		if(op->mem_type == MEM_ST)
			op->st_vaddr = ac_addr;
		else if(op->mem_type == MEM_LD)
			op->ld_vaddr = ac_addr;
		EX_latch->pipeline_stall_enabled = false;
		pteaddr_broadcasted = false;
		goto dcache_access_for_data;
	}

	if(vmem_enabled && (op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES))
	{
		/* Making tlb access just to test if we get a hit.
		   This is not a real TLB access. It is done later.
		   This is just to take care of dcache_latency */
		uint64_t tvaddr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr;
		uint64_t tvpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		uint64_t tpfn;
		if(!tlb_access(dtlb,tvpn,threadid,&tpfn))
			effective_latency = 2 * (dcache_latency - 1);
	}

	/* If it is a memory instruction, wait for dcache latency cycles */
	if((op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES))
	{
		if(op->is_waiting)	
		{
			if(cycle_count < op->wait_till_cycle)	// op should remain blocked for dcache access latency
				return;

			op->is_waiting = false;			// op completed wait for dcache access latency
		}
		else
		{
			if(!returning_on_mshr_full && !pteop_returning_on_mshr_full)
			{
				op->wait_till_cycle = cycle_count + effective_latency;	// new op - set a deadline
				op->is_waiting = true;					// order it to wait
				return;
			}
		}
	}
		
	uint64_t tvpn,tpfn,tpteaddr,tindex,tphysical_addr;

	if(pteop_returning_on_mshr_full)
	{
		tvpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		tindex = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
	 	tpteaddr = vmem_get_pteaddr(tvpn,threadid);
		bool b = tlb_access(dtlb,tvpn,threadid,&tpfn);
		if(b) 
		{
			dtlb_hit_count++;
			dtlb_hit_count_thread[threadid]++;
		}
		else
		{
			dtlb_miss_count++;
			dtlb_miss_count_thread[threadid]++;
		}
		if(b)
		{
			/* Got address translation in TLB */
			tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex;
			ac_addr = tphysical_addr;
			if(op->mem_type == MEM_ST)
				op->st_vaddr = ac_addr;
			else if(op->mem_type == MEM_LD)
				op->ld_vaddr = ac_addr;
			/* Remove the flag that indicates that insert_mshr failed */
			pteop_returning_on_mshr_full = false;
			/* Unblock the stall as it is not applicable any more */
			EX_latch->pipeline_stall_enabled = false;
			goto dcache_access_for_data;
		}
		else if(dcache_access(tpteaddr))
		{
			/* we got a cache hit on address translation */
			dcache_hit_count++;
			dcache_hit_count_thread[threadid]++;
			cache_update(data_cache,tpteaddr);
			/* we got the pfn from dcache.
			   Here, we get it using vpn_to_pfn translation function */
			tpfn = vmem_vpn_to_pfn(tvpn,threadid);
			tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex;
			tlb_install(dtlb,tvpn,threadid,tpfn);
			/* change the address accessed in cache as well as change mem request address
			   in case there is a cache miss */
			ac_addr = tphysical_addr;
			if(op->mem_type == MEM_ST)
				op->st_vaddr = ac_addr;
			else if(op->mem_type == MEM_LD)
				op->ld_vaddr = ac_addr;
			/* Remove the flag that indicates that insert_mshr failed */
			pteop_returning_on_mshr_full = false;
			/* Unblock the stall as it is not applicable any more */
			EX_latch->pipeline_stall_enabled = false;
			goto dcache_access_for_data; // add if needed
		}
		else
		{
			/* We got a cache miss for the address translation.
			   We will need to look up Page Table Entry in dram */
			dcache_miss_count++;
			dcache_miss_count_thread[threadid]++;
			/* We need to stall the pipeline as we want to make dram
			   access for PTE */
			EX_latch->pipeline_stall_enabled = true;
			/* We also need a dummy load op that will go into memory */
			Op * pteop = get_free_op();
			pteop->is_pteop = true;
			pteop->mem_type = MEM_LD;
			pteop->ld_vaddr = tpteaddr;
			pteop->mem_read_size = VMEM_PTE_SIZE;
			pteop->vpn = tvpn;
			if(main_memory->store_load_forwarding(pteop))
			{
				tpfn = vmem_vpn_to_pfn(tvpn,threadid);
				tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex;
				tlb_install(dtlb,tvpn,threadid,tpfn);	
				cache_update(data_cache,tpteaddr);
				ac_addr = tphysical_addr;
				if(op->mem_type == MEM_ST)
					op->st_vaddr = ac_addr;
				else if(op->mem_type == MEM_LD)
					op->ld_vaddr = ac_addr;
				/* Remove the flag that indicates that insert_mshr failed */
				pteop_returning_on_mshr_full = false;
				/* Unblock the stall as it is not applicable any more */
				EX_latch->pipeline_stall_enabled = false;
				goto dcache_access_for_data; // add if needed
			}
			else if(main_memory->check_piggyback(pteop))
			{
				pteop_returning_on_mshr_full = false;
				EX_latch->pipeline_stall_enabled = true;
				return;
			}
			else if(main_memory->insert_mshr(pteop))
			{
				pteop_returning_on_mshr_full = false;
				EX_latch->pipeline_stall_enabled = true;
				return;
			}
			else
			{
				dtlb_miss_count--;
				dtlb_miss_count_thread[threadid]--;
				pteop_returning_on_mshr_full = true;
				EX_latch->pipeline_stall_enabled = true;
				free_op(pteop);
				return;
			}

		}
	}

	/* If we came back here due to mshr was full during first attempt of getting translation,
	   and if the translation is not yet available,
	   then we should not execute further part of the function */
	if(EX_latch->pipeline_stall_enabled)
		return;

	/* Op has completed its wait for dcache latency amount of cycles */
	if(returning_on_mshr_full)
	{
		UINT64 ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr;
		if(dcache_access(ac_addr))
		{
			/* we got a cache hit - pass op to WB stage*/
			dcache_hit_count++;
			dcache_hit_count_thread[threadid]++;
			cache_update(data_cache,ac_addr);
			MEM_latch->op = op;		// deprecated
			MEM_latch->oplist.push_back(op);
			MEM_latch->op_valid = true;
			EX_latch->op_valid = false;	/* will help in handling Case #2 hit under miss */
			returning_on_mshr_full = false;		// XXX : check validity - added in lab 3
			return;
		}

		if(main_memory->insert_mshr(op))
		{
			/* added successfully into mshr */
			EX_latch->op_valid = false;
			returning_on_mshr_full = false;
			return;
		}
		else
		{
			returning_on_mshr_full = true;
			return;		// MSHR is full - wait for next cycle
		}
	}

	ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr;
	UINT64 physical_addr;
	uint64_t vpn,pfn,pteaddr,index;
	/* If we are using the virtual memory -> then access TLB.
	   This is a real TLB access where we get data and decide whether to access dcache.
	   Cache is indexed using physical addresses now */
	if(vmem_enabled && (op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES) && !pteop_returning_on_mshr_full)
	{
		vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
		bool b = tlb_access(dtlb,vpn,threadid,&pfn);
		if(b)
		{
			dtlb_hit_count++;
			dtlb_hit_count_thread[threadid]++;
		}
		else
		{
			dtlb_miss_count++;
			dtlb_miss_count_thread[threadid]++;
		}
		if(b)
		{
			/* Got address translation in TLB */
			physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index;
			/* change the address accessed in cache as well as change mem request address
			   in case there is a cache miss */
			ac_addr = physical_addr;
			if(op->mem_type == MEM_ST)
				op->st_vaddr = ac_addr;
			else if(op->mem_type == MEM_LD)
				op->ld_vaddr = ac_addr;
			EX_latch->pipeline_stall_enabled = false;
			/* No need to do anything else. Access dcache to get actual data 
			   GOTO dcache_access_for_data; // add if needed
			*/
		}
		else
		{
			/* We have a miss in TLB. Must access cache / page table in memory
			   to get the address translation */
			/* We have to get the PTE address first */
			vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
			index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue();
			pteaddr = vmem_get_pteaddr(vpn,threadid);
			if(dcache_access(pteaddr))
			{
				/* we got a cache hit on address translation */
				dcache_hit_count++;
				dcache_hit_count_thread[threadid]++;
				cache_update(data_cache,pteaddr);
				/* we got the pfn from dcache.
				   Here, we get it using vpn_to_pfn translation function */
				pfn = vmem_vpn_to_pfn(vpn,threadid);
				physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index;
				tlb_install(dtlb,vpn,threadid,pfn);
				/* change the address accessed in cache as well as change mem request address
				   in case there is a cache miss */
				ac_addr = physical_addr;
				if(op->mem_type == MEM_ST)
					op->st_vaddr = ac_addr;
				else if(op->mem_type == MEM_LD)
					op->ld_vaddr = ac_addr;
				EX_latch->pipeline_stall_enabled = false;
				/* No need to do anything else. Access dcache to get actual data 
				   GOTO dcache_access_for_data; // add if needed
				*/
			}
			else
			{
				/* We got a cache miss for the address translation.
				   We will need to look up Page Table Entry in dram */
				dcache_miss_count++;
				dcache_miss_count_thread[threadid]++;
				/* We also need a dummy load op that will go into memory */
				Op * pteop = get_free_op();
				pteop->is_pteop = true;
				pteop->mem_type = MEM_LD;
				pteop->ld_vaddr = pteaddr;
				pteop->mem_read_size = VMEM_PTE_SIZE;
				pteop->vpn = vpn;
				if(main_memory->store_load_forwarding(pteop))
				{
					/* we got MSHR hit on store load forwarding */
					pfn = vmem_vpn_to_pfn(vpn,threadid);
					physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index;
					cache_update(data_cache,pteaddr);
					tlb_install(dtlb,vpn,threadid,pfn);
					ac_addr = physical_addr;
					if(op->mem_type == MEM_ST)
						op->st_vaddr = ac_addr;
					else if(op->mem_type == MEM_LD)
						op->ld_vaddr = ac_addr;
					EX_latch->pipeline_stall_enabled = false;
					/* No need to do anything else. Access dcache to get actual data 
					   GOTO dcache_access_for_data; // add if needed
					*/

				}
				else if(main_memory->check_piggyback(pteop))
				{
					/* We need to stall the pipeline as we want to make dram
					   access for PTE */
					EX_latch->pipeline_stall_enabled = true;
					return;
				}
				else if(main_memory->insert_mshr(pteop))
				{
					/* We need to stall the pipeline as we want to make dram
					   access for PTE */
					EX_latch->pipeline_stall_enabled = true;
					return;
				}
				else
				{
					EX_latch->pipeline_stall_enabled = true;
					pteop_returning_on_mshr_full = true;
					free_op(pteop);
					return;
				}

			}

		}
	}

	/* Check if we get the hit */

	dcache_access_for_data :

	if(dcache_access(ac_addr))
	{
		/* we got a cache hit - pass op to WB stage*/
		dcache_hit_count++;
		dcache_hit_count_thread[threadid]++;
		cache_update(data_cache,ac_addr);
		MEM_latch->op = op;		// deprecated
		MEM_latch->oplist.push_back(op);
		MEM_latch->op_valid = true;
		EX_latch->op_valid = false;	/* will help in handling Case #2 hit under miss */
		return;
	}
	
	/* We got a cache miss */
	dcache_miss_count++;
	dcache_miss_count_thread[threadid]++;
	/* Store Load Forwarding */
	if(main_memory->store_load_forwarding(op))
	{
		/* We got MSHR Hit in store load forwarding */
		store_load_forwarding_count++;
		store_load_forwarding_count_thread[threadid]++;
		MEM_latch->op = op;		// deprecated
		MEM_latch->oplist.push_back(op);
		MEM_latch->op_valid = true;
		EX_latch->op_valid = false;
		return;
	}
	/* Check if there is block hit for inst already present in MSHR - Case #4 MSHR HIT*/
	else if(main_memory->check_piggyback(op))
	{
		/* instruction piggybacked - allow EX to send next instruction */
		EX_latch->op_valid = false;
		return;
	}
	else
	{
		/* cache & MSHR miss - add into mshr */
		if(main_memory->insert_mshr(op))
		{
			/* added successfully into mshr */
			EX_latch->op_valid = false;
			returning_on_mshr_full = false;
			return;
		}
		else
		{
			returning_on_mshr_full = true;
			return;		// MSHR is full - wait for next cycle
		}
	}
	return;
}
Example #8
0
void WB_stage(memory_c *main_memory)
{
  /* You MUST call free_op function here after an op is retired */
//  if(last_instruction_recvd && (main_memory->m_mshr.empty()) && (!MEM_latch->op_valid))

  if(!MEM_latch->op_valid)
	  return;

  if(MEM_latch->oplist.empty())
	  return;
  for(list<Op *>::iterator it = MEM_latch->oplist.begin() ; it != MEM_latch->oplist.end() ; it++)
  {
	  Op *op = *it;
	  if((op->dst >= 0) && (op->dst < NUM_REG))
	  {
		/* Mark the destination register as not busy */
		register_file[op->thread_id][op->dst].busy = false;
	  }
	  /* If it is a branch instruction, the address of branch is now available.
	     Tell the FE_stage to Fetch next instruction */
	  if((op->cf_type >= CF_BR) && (op->cf_type < NUM_CF_TYPES) && (FE_latch->stall_enforcer_thread[op->thread_id] == op->inst_id))
	  {
		FE_latch->pipeline_stall_enabled_thread[op->thread_id] = false;
		FE_latch->stall_enforcer_thread[op->thread_id] = 0;
	  }
	  
	  retired_instruction_thread[op->thread_id]++;

	  if(op->is_last_instruction)
	  {
		  last_instruction_recvd = true;
	  	  retired_instruction--;
		  retired_instruction_thread[op->thread_id]--;
		  num_of_inst_with_last_inst = MEM_latch->oplist.size();
	  }
  	  free_op(op);
   }
  
  retired_instruction += MEM_latch->oplist.size();
  MEM_latch->op_valid = false;
  MEM_latch->oplist.clear();

  /* setting sim end condition */
  bool l_inst,dram,fe,id,ex,mem,wb;
  l_inst = last_instruction_recvd;
  dram = main_memory->m_mshr.empty();
  wb = WB_pending.empty();
  mem = !MEM_latch->op_valid;
  ex = !EX_latch->op_valid;
  id = !ID_latch->op_valid;
  fe = true;
  for(int i = 0 ; i < thread_count ; i++)
  {
	  if(FE_latch->op_valid_thread[i])
		  fe = false;
  }

//  if(last_instruction_recvd && (main_memory->m_mshr.empty()) && (!MEM_latch->op_valid) && (WB_pending.empty()))

  if(l_inst  &&  dram  &&  wb  &&  mem  &&  ex  &&  id  &&  fe)
  {
	  sim_end_condition = true;
	  if(num_of_inst_with_last_inst == 1)
		  cycle_count--;
	  return;
  }

}
Example #9
0
void FE_stage()
{

	if(stop_fetching)
		return;

	if(have_to_send_EOS)
	{
		if(sendEOS())
		{
			stop_fetching = true;
			have_to_send_EOS = false;
		}
		return;
	}

	#if 0
	if(FE_latch->op_valid || FE_latch->pipeline_stall_enabled)
	{
		/* Data inside the latch is valid and next stage is still using it.
		Or ID stage has enabled pipeline stalling because of a branch instruction.
		Do not fetch */
		return;
	}
	/* This condition is rewritten for multithreading. See following statements.
	~(a OR b) ===>  ~a AND ~b */
	#endif

	static UINT64 fetch_arbiter = 0;
	int stream_id = -1;
	Op *op;
	bool op_exists = false, stalled[HW_MAX_THREAD];

	for(int i = 0 ; i < HW_MAX_THREAD ; i++)
		stalled[i] = true;

	/* Find next available empty queue slot to fill */
	for(int i = 0 ; i < thread_count ; i++)
	{
		stream_id = fetch_arbiter++ % thread_count;
		if(!FE_latch->op_valid_thread[stream_id] && !FE_latch->pipeline_stall_enabled_thread[stream_id])
		{
			stalled[stream_id] = false;
			op = get_free_op();
			op_exists = get_op(op, stream_id);
			if(op_exists)
				break;
			else
				free_op(op);
		}
	}
	
	if(!op_exists)
	{
		/* No op fetched - this could be due to following : 
		   1. all threads were stalled
		   2. some threads were stalled and others have run out of instructions
		   3. no instructions available to fetch
		*/

		// checking case 1
		bool all_stalled = true;
		for(int i = 0 ; i < thread_count ; i++)
		{
			if(!stalled[i])
				all_stalled = false;
		}
		if(all_stalled)
			return;

		// checking case 2 & 3
		bool eois = true;	// end of instruction streams
		for(int i = 0 ; i < thread_count ; i++)
		{
			if(!end_of_stream[i])
				eois = false;
		}
		if(!eois)
			return;
		else
		{
			/* Must take actions for initiating simulator shut down */
			// first it should be seen if there is some space in queue.
			// if no, then try to send in next cycle
			if(sendEOS())
				stop_fetching = true;
			else
				have_to_send_EOS = true;
			return;
		}
	}

	/* If the op is an branch other than conditional branch, assume that it is predicted correctly,
	if the branch predictor is used */
	//  if(use_bpred && (op->cf_type >= CF_BR) && (op->cf_type < NUM_CF_TYPES) && (op->cf_type != CF_CBR))
	//	  bpred_okpred_count++;
	/* Above 2 lines commented because its not the way solution is implemented */

	/* If we are using branch predictor and type of opcode is conditional branch,
	get a prediction and update GHR and PHT */
	if(use_bpred && (op->cf_type == CF_CBR))
	{
		int prediction = bpred_access(branchpred, op->instruction_addr, op->thread_id);
		if(prediction == op->actually_taken)
		{
			bpred_okpred_count++;
			bpred_okpred_count_thread[op->thread_id]++;
		}
		else
		{
			bpred_mispred_count++;
			bpred_mispred_count_thread[op->thread_id]++;
			/* stall the pipeline if we mispredict */
			FE_latch->pipeline_stall_enabled_thread[op->thread_id] = true;;
			FE_latch->stall_enforcer_thread[op->thread_id] = op->inst_id;
		}
		bpred_update(branchpred,op->instruction_addr,prediction,op->actually_taken, op->thread_id);
	}

	/* hwsim : get the instruction and pass to ID phase */
	# if 0
	/* Deprecated  after adding MT support */
	FE_latch->op = op;				/* pass the op to ID stage */
	FE_latch->op_valid = true;			/* Mark it as valid */
	#endif

	FE_latch->op_queue[op->thread_id] = op;
	FE_latch->op_valid_thread[op->thread_id] = true;

}
void WB_stage()
{   
	Op *wb_op;
    
	if(theEnd && !MEM_latch->op_valid && last_op_in_queue==true) //sim_end_condition
	{
	    	  sim_end_condition=TRUE;
	       
	}  
	
    if(MEM_latch->op_valid) 
    { 
   
     wb_op=MEM_latch->op; // copying MEM_latch to op structure
    /*  if(wb_op->last_op==TRUE && last_op_in_queue==true) // taking care of the sim_end_condition for the last instrn
       {
    	  sim_end_condition=TRUE;
       std::cout<<"sim end condition is true 2"<<endl;
       }  */
    
    
    if(wb_op->cf_type >= CF_BR) // Branch instrn at WB stage removes control dependency from all preceeding stages
	  cd_stall=FALSE;
 
    if((int)wb_op->dst==-1) // instruction doesn't write to register
     { 
      
      if(!wb_op->last_op)
         retired_instruction++;
      MEM_latch->op_valid=FALSE;
      free_op(wb_op);
     }	
    else
     { 
      register_file[(int)wb_op->dst].valid=TRUE; // to remove register(data) dependency at ID stage
      dd_stall=FALSE;  
      if(!wb_op->last_op)
           retired_instruction++;
      MEM_latch->op_valid=FALSE;
      free_op(wb_op);
     }
    }
   
  
  // Also retire instructions from the queue coming from DRAM queue
    
  while(!mem_op_queue.empty())
    { 
	  wb_op = mem_op_queue.front();
	  if(wb_op->cf_type >= CF_BR) // Branch instrn at WB stage removes control dependency from all preceeding stages
	  	  cd_stall=FALSE;
	  
	  if((int)wb_op->dst==-1) // instruction doesn't write to register
	       {
		  
		  if(!wb_op->last_op)
		       retired_instruction++;
	      free_op(wb_op);
	       }	
	  else
	       {  
	        register_file[(int)wb_op->dst].valid=TRUE; // to remove register(data) dependency at ID stage
	        dd_stall=FALSE;  
	        if(!wb_op->last_op)
	            retired_instruction++;
	        free_op(wb_op);
	       }
	  
	  mem_op_queue.pop_front();
	  
    }  
}