void MEM_stage(memory_c *main_memory) // please modify MEM_stage function argument /** NEW-LAB2 */ { if((MEM_latch->op_valid) || (!EX_latch->op_valid)) return; Op *op = EX_latch->op; int threadid = op->thread_id; /* keep this 0 for LAB 3 */ uint64_t effective_latency = dcache_latency - 1; if(!((op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES))) // Not a memory op { MEM_latch->op = op; // deprecated MEM_latch->oplist.push_back(op); MEM_latch->op_valid = true; EX_latch->op_valid = false; return; } UINT64 ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr; if(pteaddr_broadcasted) { /* We have all translations now. Set up the op to do cache access so that actual data is now loaded */ uint64_t vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); uint64_t index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); uint64_t pteaddr = vmem_get_pteaddr(vpn,threadid); uint64_t pfn = vmem_vpn_to_pfn(vpn,threadid); ac_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; EX_latch->pipeline_stall_enabled = false; pteaddr_broadcasted = false; goto dcache_access_for_data; } if(vmem_enabled && (op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES)) { /* Making tlb access just to test if we get a hit. This is not a real TLB access. It is done later. This is just to take care of dcache_latency */ uint64_t tvaddr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr; uint64_t tvpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); uint64_t tpfn; if(!tlb_access(dtlb,tvpn,threadid,&tpfn)) effective_latency = 2 * (dcache_latency - 1); } /* If it is a memory instruction, wait for dcache latency cycles */ if((op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES)) { if(op->is_waiting) { if(cycle_count < op->wait_till_cycle) // op should remain blocked for dcache access latency return; op->is_waiting = false; // op completed wait for dcache access latency } else { if(!returning_on_mshr_full && !pteop_returning_on_mshr_full) { op->wait_till_cycle = cycle_count + effective_latency; // new op - set a deadline op->is_waiting = true; // order it to wait return; } } } uint64_t tvpn,tpfn,tpteaddr,tindex,tphysical_addr; if(pteop_returning_on_mshr_full) { tvpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); tindex = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); tpteaddr = vmem_get_pteaddr(tvpn,threadid); bool b = tlb_access(dtlb,tvpn,threadid,&tpfn); if(b) { dtlb_hit_count++; dtlb_hit_count_thread[threadid]++; } else { dtlb_miss_count++; dtlb_miss_count_thread[threadid]++; } if(b) { /* Got address translation in TLB */ tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex; ac_addr = tphysical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; /* Remove the flag that indicates that insert_mshr failed */ pteop_returning_on_mshr_full = false; /* Unblock the stall as it is not applicable any more */ EX_latch->pipeline_stall_enabled = false; goto dcache_access_for_data; } else if(dcache_access(tpteaddr)) { /* we got a cache hit on address translation */ dcache_hit_count++; dcache_hit_count_thread[threadid]++; cache_update(data_cache,tpteaddr); /* we got the pfn from dcache. Here, we get it using vpn_to_pfn translation function */ tpfn = vmem_vpn_to_pfn(tvpn,threadid); tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex; tlb_install(dtlb,tvpn,threadid,tpfn); /* change the address accessed in cache as well as change mem request address in case there is a cache miss */ ac_addr = tphysical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; /* Remove the flag that indicates that insert_mshr failed */ pteop_returning_on_mshr_full = false; /* Unblock the stall as it is not applicable any more */ EX_latch->pipeline_stall_enabled = false; goto dcache_access_for_data; // add if needed } else { /* We got a cache miss for the address translation. We will need to look up Page Table Entry in dram */ dcache_miss_count++; dcache_miss_count_thread[threadid]++; /* We need to stall the pipeline as we want to make dram access for PTE */ EX_latch->pipeline_stall_enabled = true; /* We also need a dummy load op that will go into memory */ Op * pteop = get_free_op(); pteop->is_pteop = true; pteop->mem_type = MEM_LD; pteop->ld_vaddr = tpteaddr; pteop->mem_read_size = VMEM_PTE_SIZE; pteop->vpn = tvpn; if(main_memory->store_load_forwarding(pteop)) { tpfn = vmem_vpn_to_pfn(tvpn,threadid); tphysical_addr = (tpfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | tindex; tlb_install(dtlb,tvpn,threadid,tpfn); cache_update(data_cache,tpteaddr); ac_addr = tphysical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; /* Remove the flag that indicates that insert_mshr failed */ pteop_returning_on_mshr_full = false; /* Unblock the stall as it is not applicable any more */ EX_latch->pipeline_stall_enabled = false; goto dcache_access_for_data; // add if needed } else if(main_memory->check_piggyback(pteop)) { pteop_returning_on_mshr_full = false; EX_latch->pipeline_stall_enabled = true; return; } else if(main_memory->insert_mshr(pteop)) { pteop_returning_on_mshr_full = false; EX_latch->pipeline_stall_enabled = true; return; } else { dtlb_miss_count--; dtlb_miss_count_thread[threadid]--; pteop_returning_on_mshr_full = true; EX_latch->pipeline_stall_enabled = true; free_op(pteop); return; } } } /* If we came back here due to mshr was full during first attempt of getting translation, and if the translation is not yet available, then we should not execute further part of the function */ if(EX_latch->pipeline_stall_enabled) return; /* Op has completed its wait for dcache latency amount of cycles */ if(returning_on_mshr_full) { UINT64 ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr; if(dcache_access(ac_addr)) { /* we got a cache hit - pass op to WB stage*/ dcache_hit_count++; dcache_hit_count_thread[threadid]++; cache_update(data_cache,ac_addr); MEM_latch->op = op; // deprecated MEM_latch->oplist.push_back(op); MEM_latch->op_valid = true; EX_latch->op_valid = false; /* will help in handling Case #2 hit under miss */ returning_on_mshr_full = false; // XXX : check validity - added in lab 3 return; } if(main_memory->insert_mshr(op)) { /* added successfully into mshr */ EX_latch->op_valid = false; returning_on_mshr_full = false; return; } else { returning_on_mshr_full = true; return; // MSHR is full - wait for next cycle } } ac_addr = (op->mem_type == MEM_ST) ? op->st_vaddr : op->ld_vaddr; UINT64 physical_addr; uint64_t vpn,pfn,pteaddr,index; /* If we are using the virtual memory -> then access TLB. This is a real TLB access where we get data and decide whether to access dcache. Cache is indexed using physical addresses now */ if(vmem_enabled && (op->mem_type > NOT_MEM) && (op->mem_type < NUM_MEM_TYPES) && !pteop_returning_on_mshr_full) { vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); bool b = tlb_access(dtlb,vpn,threadid,&pfn); if(b) { dtlb_hit_count++; dtlb_hit_count_thread[threadid]++; } else { dtlb_miss_count++; dtlb_miss_count_thread[threadid]++; } if(b) { /* Got address translation in TLB */ physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index; /* change the address accessed in cache as well as change mem request address in case there is a cache miss */ ac_addr = physical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; EX_latch->pipeline_stall_enabled = false; /* No need to do anything else. Access dcache to get actual data GOTO dcache_access_for_data; // add if needed */ } else { /* We have a miss in TLB. Must access cache / page table in memory to get the address translation */ /* We have to get the PTE address first */ vpn = ac_addr / KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); index = ac_addr % KNOB(KNOB_VMEM_PAGE_SIZE)->getValue(); pteaddr = vmem_get_pteaddr(vpn,threadid); if(dcache_access(pteaddr)) { /* we got a cache hit on address translation */ dcache_hit_count++; dcache_hit_count_thread[threadid]++; cache_update(data_cache,pteaddr); /* we got the pfn from dcache. Here, we get it using vpn_to_pfn translation function */ pfn = vmem_vpn_to_pfn(vpn,threadid); physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index; tlb_install(dtlb,vpn,threadid,pfn); /* change the address accessed in cache as well as change mem request address in case there is a cache miss */ ac_addr = physical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; EX_latch->pipeline_stall_enabled = false; /* No need to do anything else. Access dcache to get actual data GOTO dcache_access_for_data; // add if needed */ } else { /* We got a cache miss for the address translation. We will need to look up Page Table Entry in dram */ dcache_miss_count++; dcache_miss_count_thread[threadid]++; /* We also need a dummy load op that will go into memory */ Op * pteop = get_free_op(); pteop->is_pteop = true; pteop->mem_type = MEM_LD; pteop->ld_vaddr = pteaddr; pteop->mem_read_size = VMEM_PTE_SIZE; pteop->vpn = vpn; if(main_memory->store_load_forwarding(pteop)) { /* we got MSHR hit on store load forwarding */ pfn = vmem_vpn_to_pfn(vpn,threadid); physical_addr = (pfn * KNOB(KNOB_VMEM_PAGE_SIZE)->getValue()) | index; cache_update(data_cache,pteaddr); tlb_install(dtlb,vpn,threadid,pfn); ac_addr = physical_addr; if(op->mem_type == MEM_ST) op->st_vaddr = ac_addr; else if(op->mem_type == MEM_LD) op->ld_vaddr = ac_addr; EX_latch->pipeline_stall_enabled = false; /* No need to do anything else. Access dcache to get actual data GOTO dcache_access_for_data; // add if needed */ } else if(main_memory->check_piggyback(pteop)) { /* We need to stall the pipeline as we want to make dram access for PTE */ EX_latch->pipeline_stall_enabled = true; return; } else if(main_memory->insert_mshr(pteop)) { /* We need to stall the pipeline as we want to make dram access for PTE */ EX_latch->pipeline_stall_enabled = true; return; } else { EX_latch->pipeline_stall_enabled = true; pteop_returning_on_mshr_full = true; free_op(pteop); return; } } } } /* Check if we get the hit */ dcache_access_for_data : if(dcache_access(ac_addr)) { /* we got a cache hit - pass op to WB stage*/ dcache_hit_count++; dcache_hit_count_thread[threadid]++; cache_update(data_cache,ac_addr); MEM_latch->op = op; // deprecated MEM_latch->oplist.push_back(op); MEM_latch->op_valid = true; EX_latch->op_valid = false; /* will help in handling Case #2 hit under miss */ return; } /* We got a cache miss */ dcache_miss_count++; dcache_miss_count_thread[threadid]++; /* Store Load Forwarding */ if(main_memory->store_load_forwarding(op)) { /* We got MSHR Hit in store load forwarding */ store_load_forwarding_count++; store_load_forwarding_count_thread[threadid]++; MEM_latch->op = op; // deprecated MEM_latch->oplist.push_back(op); MEM_latch->op_valid = true; EX_latch->op_valid = false; return; } /* Check if there is block hit for inst already present in MSHR - Case #4 MSHR HIT*/ else if(main_memory->check_piggyback(op)) { /* instruction piggybacked - allow EX to send next instruction */ EX_latch->op_valid = false; return; } else { /* cache & MSHR miss - add into mshr */ if(main_memory->insert_mshr(op)) { /* added successfully into mshr */ EX_latch->op_valid = false; returning_on_mshr_full = false; return; } else { returning_on_mshr_full = true; return; // MSHR is full - wait for next cycle } } return; }
void MEM_stage(memory_c *main_memory) // please modify MEM_stage function argument /** NEW-LAB2 */ { /* you must complete the function */ static int latency_count; static bool mshr_full = false; static bool address_translation_done = false; static bool tlb_previously_missed = false; static bool pte_load_in_progress = false; int flag; if(EX_latch->op_valid==true) { /********* LAB 3 ************/ if((EX_latch->op)->mem_type>=1) { // If virtual memory enabled but translation not done if(vmem_enabled == true && address_translation_done == false) { uint64_t vaddr, phyFNum, phy_addr, virPNum; if((EX_latch->op)->mem_type == MEM_LD){ flag=0; vaddr = (EX_latch->op)->ld_vaddr; } else{ flag=1; vaddr = (EX_latch->op)->st_vaddr; } virPNum = (vaddr) / vmem_page_size; if(tlb_access(dtlb,virPNum,0,&phyFNum) == true) //if a TLB hit { if(tlb_previously_missed == false) dtlb_hit_count++; else tlb_previously_missed = false; uint64_t page_offset = vaddr % vmem_page_size; phy_addr=((phyFNum)*vmem_page_size) + page_offset; //cout << (EX_latch->op)->inst_id << " " << phy_addr << endl; if((EX_latch->op)->mem_type == MEM_LD) (EX_latch->op)->ld_vaddr = phy_addr; else (EX_latch->op)->st_vaddr = phy_addr; address_translation_done = true; EX_latch->stage_stall = false; //remove any stalls due to TLB } else //if TLB miss { if(tlb_previously_missed == false) { dtlb_miss_count++; tlb_previously_missed=true; } uint64_t pteaddr = vmem_get_pteaddr(virPNum,0); EX_latch->stage_stall=true; //if you get a TLB miss, stall pipeline for sure if(pte_load_in_progress == false) //if pte request not put into MSHR, check in dcache { if (mshr_full == false) { if(latency_count == 0) { latency_count=KNOB(KNOB_DCACHE_LATENCY)->getValue(); } latency_count--; if(latency_count == 0) { //if(flag==1) dcache_read_count++; //if(flag==0) dcache_write_count++; if(dcache_access(pteaddr)) { if(flag==1) dcache_read_count++; if(flag==0) dcache_write_count++; dcache_hit_count++; uint64_t pfnum = vmem_vpn_to_pfn(virPNum,0); tlb_install(dtlb,virPNum,0,pfnum); } else { dcache_write_count++; //cout << "tlb cache miss" <<endl; dcache_miss_count++; dummy_op->valid = true; dummy_op->mem_type = MEM_LD; dummy_op->ld_vaddr = pteaddr; dummy_op->mem_read_size = 4; //some non-zero value dummy_op->opcode = OP_LD_PTE; //to distinguish a LD PTE op if(main_memory->insert_mshr(dummy_op) == true) { //cout << "dummy_op_insert" << endl; pte_load_in_progress=true; pte_load_done=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } } } else { if(main_memory->insert_mshr(dummy_op)==true) //if cannot be piggybacked, try inserting a new entry { pte_load_in_progress=true; pte_load_done=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } } else { if(pte_load_done) { uint64_t pfnum = vmem_vpn_to_pfn(virPNum,0); tlb_install(dtlb,virPNum,0,pfnum); pte_load_done = false; pte_load_in_progress = false; } } } } // If virtual memory enabled & translation complete OR virtual memory disabled if((vmem_enabled == true && address_translation_done == true) || (vmem_enabled==false)) { /* Do everything*/ if((EX_latch->op)->mem_type==MEM_LD) { if(mshr_full == false) { if(latency_count == 0) { latency_count=KNOB(KNOB_DCACHE_LATENCY)->getValue(); EX_latch->stage_stall=true; } latency_count--; if(latency_count == 0) { if(dcache_access((EX_latch->op)->ld_vaddr)) { dcache_read_count++; address_translation_done = false; //reset the address_translation_done flag dcache_hit_count++; EX_latch->op_valid=false; EX_latch->stage_stall=false; fill_retire_queue(EX_latch->op); //not really a broadcast, just using available function } else { dcache_write_count++; //cout << "op cache access" << endl; dcache_miss_count++; if(main_memory->store_load_forwarding(EX_latch->op)) //check if store load fwding is possible { //cout << "store-load " << EX_latch->op->inst_id << endl; address_translation_done = false; //reset the address_translation_done flag store_load_forwarding_count++; EX_latch->op_valid=false; EX_latch->stage_stall=false; fill_retire_queue(EX_latch->op); //not really a broadcast, just using available function } else if(main_memory->check_piggyback(EX_latch->op) == false) { if(main_memory->insert_mshr(EX_latch->op)==true) //if cannot be piggybacked, try inserting a new entry { //cout << "op_insert" << endl; address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } else { address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; } } } } else { if(main_memory->insert_mshr(EX_latch->op)==true) //if cannot be piggybacked, try inserting a new entry { address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } } else if((EX_latch->op)->mem_type==MEM_ST) { if(mshr_full == false) { if(latency_count == 0) { latency_count=KNOB(KNOB_DCACHE_LATENCY)->getValue(); EX_latch->stage_stall=true; } latency_count--; if(latency_count == 0) { dcache_write_count++; if(dcache_access((EX_latch->op)->st_vaddr)) { address_translation_done = false; //reset the address_translation_done flag dcache_hit_count++; EX_latch->op_valid=false; EX_latch->stage_stall=false; fill_retire_queue(EX_latch->op); //not really a broadcast, just using available function } else { dcache_miss_count++; if(main_memory->store_store_forwarding(EX_latch->op)) //check if store load fwding is possible { address_translation_done = false; //reset the address_translation_done flag store_store_forwarding_count++; EX_latch->op_valid=false; EX_latch->stage_stall=false; fill_retire_queue(EX_latch->op); //not really a broadcast, just using available function } else if(main_memory->check_piggyback(EX_latch->op) == false) { if(main_memory->insert_mshr(EX_latch->op)==true) //if cannot be piggybacked, try inserting a new entry { address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } else { dcache_write_count--; address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; } } } } else { if(main_memory->insert_mshr(EX_latch->op)==true) //if cannot be piggybacked, try inserting a new entry { address_translation_done = false; //reset the address_translation_done flag EX_latch->op_valid=false; EX_latch->stage_stall=false; mshr_full = false; } else { EX_latch->stage_stall=true; //if mshr is full mshr_full = true; } } } } } else { EX_latch->op_valid=false; EX_latch->stage_stall=false; fill_retire_queue(EX_latch->op); //not really a broadcast, just using available function } } }