int l1p_init() { // Restore A2 hardware thread priority ThreadPriority_Medium(); if(PhysicalThreadID() != 0) return 0; // Restore prefetcher state CoreState_t* cs = GetCoreStateByCore(PhysicalProcessorID()); if(cs->default_l1p_init) { out64((void *)L1P_CFG_SPEC, cs->default_l1p_cfgspec); out64((void *)L1P_CFG_PF_USR, cs->default_l1p_cfgpfusr); out64((void *)L1P_CFG_PF_SYS, cs->default_l1p_cfgpfsys); ppc_msync(); } else { ppc_msync(); cs->default_l1p_cfgspec = in64((void*)L1P_CFG_SPEC); cs->default_l1p_cfgpfusr = in64((void*)L1P_CFG_PF_USR); cs->default_l1p_cfgpfsys = in64((void*)L1P_CFG_PF_SYS); cs->default_l1p_init = 1; ppc_msync(); } return 0; }
short LeaderLatch2(UPC_Barrier_t *pLock, volatile uint64_t *pstatus, volatile uint64_t *pstatus2) { uint64_t curLock; // check if any other thread has the lock yet. curLock = LoadReserved( pLock ); while (curLock == 0) { // quit if status is inactive if ((*pstatus || *pstatus2) == 0) { return LLATCH_CONTINUE; } // otherwise store threads index +1 as lock value. curLock = Kernel_PhysicalHWThreadIndex(); curLock += 1; if ( StoreConditional( pLock, curLock )) { ppc_msync(); // export mem sync return LLATCH_LEADER; } // try again. curLock = LoadReserved( pLock ); } // otherwise block on curLock uint64_t savpri = Set_ThreadPriority_Low(); while ( *pLock ) { asm volatile ("nop; nop; nop; nop;"); } Restore_ThreadPriority(savpri); ppc_msync(); // export mem sync for everyone else return LLATCH_CONTINUE; }
static inline void createSendGIPulseThread(int numloops) { int loop; for(loop=0; loop<numloops; loop++) { mtspr(SPRN_TENC, 1 << ProcessorThreadID()); isync(); MUSPI_GISend(&GI); ppc_msync(); MUSPI_GISendClear(&GI); ppc_msync(); } mtspr(SPRN_TENC, 1 << ProcessorThreadID()); isync(); }
void UPC_Lock(UPC_Lock_t *pLock) { uint64_t lockIndex = Upci_GetLockID(); uint32_t curValue; //mbar(); do { do { curValue = LoadReserved32( pLock ); if (curValue == 0 ) { break; } else if (curValue == lockIndex) { UPC_FATAL_ERR("Duplicate UPC_Lock() by thread lockIndex=%ld\n", lockIndex); // Terminate(lockIndex); // terminate test } else { uint64_t savpri = Set_ThreadPriority_Low(); while ( *pLock ) { // spin till free asm volatile ("nop; nop; nop; nop;"); } Restore_ThreadPriority(savpri); } } while(1); } while (!StoreConditional32(pLock, lockIndex)); // isync(); // create import barrier (lock seen before subsequent storage accesses) ppc_msync(); }
/*! * \brief Initializes speculation registers before the start of a job. */ int Speculation_Init() { int slice; uint64_t scrub_rate; L2C_SPECID_t specid; if(TI_isDD1() || ((GetPersonality()->Kernel_Config.NodeConfig & PERS_ENABLE_DD1_Workarounds) != 0)) { } else { SPEC_SetNumberOfDomains(1); SPEC_SetPrivMap( L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_NUMDOM) | L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_PRIVMAP) ); ppc_msync(); for(specid=0; specid<128; specid++) { SPEC_TryChangeState_priv(specid, L2C_IDSTATE_PRED_SPEC | L2C_IDSTATE_INVAL); SPEC_SetConflict_priv(specid, 0); } ppc_msync(); } App_GetEnvValue("BG_SIMPLEROLLBACK", &SIMPLE_ROLLBACK); L2_AtomicStore(&SpecDomainsAllocated, 0); domainsConfigured = 0; // Reset the L2 scrub rate scrub_rate = 64; for(slice=0; slice<L2_DCR_num; slice++) { // Set the L2 scrub rate uint64_t l2_dcr_refctrl = DCRReadPriv(L2_DCR(slice, REFCTRL)); if(default_l2_first_init) default_l2_scrub_rate[slice] = L2_DCR__REFCTRL__SCB_INTERVAL_get(l2_dcr_refctrl); L2_DCR__REFCTRL__SCB_INTERVAL_insert(l2_dcr_refctrl, default_l2_scrub_rate[slice]); DCRWritePriv(L2_DCR(slice, REFCTRL), l2_dcr_refctrl); } default_l2_first_init = 1; Speculation_ExitJailMode(); return 0; }
void LeaderUnLatch(UPC_Barrier_t *pLock) { ppc_msync(); // import mem sync // check if any other thread has the lock yet. do { LoadReserved( pLock ); } while (!StoreConditional( pLock, 0 ) ); }
// Assumes target number of threads is consistent by all threads, so no one thread needs to be the master. short UPC_Barrier(UPC_Barrier_t *pLock, short num_threads, uint64_t timeout) { uint64_t targ_num_threads = num_threads; uint64_t barr_timeout; // Timeout hardcoded in cycles if (timeout == 0) timeout = 200000; barr_timeout = GetTimeBase() + timeout; ppc_msync(); // export mem sync // store initial value uint64_t curValue; curValue = LoadReserved( pLock ); while (curValue == 0) { curValue = targ_num_threads; StoreConditional(pLock, curValue); // let 1st writer win - so don't care if fails. curValue = LoadReserved( pLock ); } // now atomically subtract 1 for this thread. do { curValue = LoadReserved( pLock ); curValue--; } while (!StoreConditional( pLock, curValue)); // now wait till value reaches zero uint64_t savpri = Set_ThreadPriority_Low(); while (*pLock > 0) { if (GetTimeBase() > barr_timeout) { UPC_CRITICAL_WARNING("Timeout(2) on barr_target of 0x%016lx; cur target=0x%016lx.\n", targ_num_threads, curValue); break; // end barrier }; } Restore_ThreadPriority(savpri); ppc_msync(); // import mem sync return (*pLock); }
void UPC_Unlock(UPC_Lock_t *pLock) { ppc_msync(); // import sync - prior store must complete before lock dropped. uint32_t lockIndex = (uint32_t)Upci_GetLockID(); uint32_t curValue = *pLock; if (curValue != lockIndex) { UPC_FATAL_ERR("Improper UPC_Unlock for lockIndex=%d; curIndex=%d\n", lockIndex, curValue); // Terminate(lockIndex); // terminate test } else { *pLock = 0; } // mbar(); }
void Futex_Interrupt( KThread_t *pKThr ) { int isShared = pKThr->FutexIsShared; // create a local copy of the shared indicator uint64_t my_turn = Lock_AtomicAcquire(isShared); Futex_State_t* futexTableEntry = Futex_findTableEntry(pKThr->FutexVAddr, isShared, 0); if (futexTableEntry != NULL) { if (futexTableEntry->pKThr_Waiter_Next == pKThr) { futexTableEntry->pKThr_Waiter_Next = pKThr->FutexQueueNext; // If there are no waiters, free the entry in the futex table. if (futexTableEntry->pKThr_Waiter_Next == NULL) { futexTableEntry->futex_vaddr = 0; } } else { KThread_t *FtQ = futexTableEntry->pKThr_Waiter_Next; while ((FtQ != NULL) && (FtQ->FutexQueueNext != pKThr)) { FtQ = FtQ->FutexQueueNext; } if (FtQ != NULL) { FtQ->FutexQueueNext = pKThr->FutexQueueNext; } } } pKThr->Reg_State.cr |= CR_ERROR; // syscall failed pKThr->Reg_State.gpr[3] = EINTR; // this is the result of the futex syscall pKThr->FutexQueueNext = (KThread_t *)0; pKThr->FutexVAddr = NULL; pKThr->FutexValue = 0; pKThr->FutexTimeout = 0; pKThr->FutexIsShared = 0; //pKThr->pad3 = 2; // TEMP PROBLEM ANALYSIS ppc_msync(); Lock_AtomicRelease(isShared, my_turn); Sched_Unblock(pKThr, SCHED_STATE_FUTEX); }
int Speculation_CleanupJob() { SPEC_SetSpeculationIDSelf_priv(0x400); // clear interrupt state, clear lower 9 bits as well // Restore the default system-call and standard-interrupt code sequences. // See Speculation_EnableFastSpeculationPath() for commentary on this // process. In this case we need an IPI only for the "system" core, // because this routine is called on every application hardware thread. uint64_t ici_needed = 0; Kernel_Lock(&FastPathsLock); if (FastPathsEnabled) { extern uint32_t Vector_EI_trampoline; extern uint32_t Vector_SC_trampoline; uint64_t exceptionVector = mfspr(SPRN_IVPR); *((uint32_t *) (exceptionVector + IVO_EI)) = Vector_EI_trampoline; *((uint32_t *) (exceptionVector + IVO_SC)) = Vector_SC_trampoline; ppc_msync(); // make sure the stores have taken effect FastPathsEnabled = 0; ici_needed = 1; // we can't hold the lock while sending IPI's } Kernel_Unlock(&FastPathsLock); // Flush the icache whether or not we're the thread that did the patching. // We only need to do this from one thread on each core. if (ProcessorThreadID() == 0) { isync(); ici(); } if (ici_needed) { // We still need an IPI for the "system" core. IPI_invalidate_icache(NodeState.NumCoresEnabled - 1); Kernel_WriteFlightLog(FLIGHTLOG_high, FL_SPCFEPDIS, 0,0,0,0); } // bqcbugs 1620 l2_set_overlock_threshold(0); l2_set_spec_threshold(0); l2_set_prefetch_enables(1); // -- return 0; }
int Futex_Timeout(KThread_t* thread) { KThread_t *FtQ; Futex_State_t* futexTableEntry = Futex_findTableEntry(thread->FutexVAddr, thread->FutexIsShared, 0); if (futexTableEntry != NULL) { if (futexTableEntry->pKThr_Waiter_Next == thread) { futexTableEntry->pKThr_Waiter_Next = thread->FutexQueueNext; // If we have no waiters on this futex address, we must remove it from the table. if (futexTableEntry->pKThr_Waiter_Next == 0) { futexTableEntry->futex_vaddr = 0; } } else { FtQ = futexTableEntry->pKThr_Waiter_Next; while ((FtQ != NULL) && (FtQ->FutexQueueNext != thread)) { FtQ = FtQ->FutexQueueNext; } if (FtQ != NULL) { FtQ->FutexQueueNext = thread->FutexQueueNext; } } } thread->Reg_State.cr |= CR_ERROR; // syscall failed thread->Reg_State.gpr[3] = ETIMEDOUT; // this is the result of the futex syscall thread->FutexQueueNext = (KThread_t *)0; thread->FutexVAddr = NULL; thread->FutexValue = 0; thread->FutexTimeout = 0; thread->FutexIsShared = 0; //thread->pad3 = 3; // TEMP PROBLEM ANALYSIS ppc_msync(); Sched_Unblock( thread, SCHED_STATE_FUTEX ); return 0; }
/*! * \brief Allocates a speculative domain. * \note Each additional domain potentially decreases the number of speculative IDs assigned to each domain. * \note Domains must have all speculative IDs set to the available state */ int Speculation_AllocateDomain(unsigned int* domain) { #if 0 const unsigned char domainmap[17] = { 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16}; if(NodeState.NumSpecDomains >= 16) { return ENOMEM; } if(!SPEC_AllAvailOrInvalid()) { // if any ID is speculative or committed, we can not switch // \todo this needs to be made safe. // While switching, we can not allow threads to alloce concurrently (race) // TM domains need to be made aware of changes to adapt their allocation mask // In short, all speculation RTS need to be shut down temporarily while changing number of domains return ENOMEM; } NodeState.NumSpecDomains++; SPEC_SetNumberOfDomains( domainmap[NodeState.NumSpecDomains] ); SPEC_SetDomainMode_priv(NodeState.NumSpecDomains-1, L2C_DOMAINATTR_MODE_STM); ppc_msync(); /* \todo Initialize commit, alloc, reclaim pointers??? */ *domain = NodeState.NumSpecDomains-1; #endif uint32_t domainAllocated = L2_AtomicLoadIncrement(&SpecDomainsAllocated); if(domainAllocated >= SPEC_GetNumberOfDomains()) { return ENOMEM; } // bqcbugs 1620. l2_set_prefetch_enables(0); l2_unlock_all_with_address((void *) 0x200000); l2_set_overlock_threshold(0xA); // set L2 overlock and spec thresholds l2_set_spec_threshold(0xA); // -- Kernel_WriteFlightLog(FLIGHTLOG_high, FL_SPCALCDOM, domainAllocated,0,0,0); *domain = domainAllocated; return 0; }
int Speculation_EnterJailMode(bool longRunningSpec) { AppProcess_t* process = GetMyProcess(); if (process != GetProcessByProcessorID(ProcessorID())) { Speculation_Restart(SPEC_GetSpeculationIDSelf_priv(), Kernel_SpecReturnCode_INVALID, &GetMyKThread()->Reg_State); return Kernel_SpecReturnCode_INVALID; } if(longRunningSpec) { uint64_t SpecPID; uint32_t ProcessOvercommit = 64 / GetMyAppState()->Active_Processes; if(ProcessOvercommit > 4) ProcessOvercommit = 4; vmm_getSpecPID(process->Tcoord, ProcessorThreadID() % ProcessOvercommit, &SpecPID); if(SpecPID) { mtspr(SPRN_PID, SpecPID); isync(); // A2 does not reliably notify A2 of DCI #if 0 volatile uint64_t* pf_sys_p=(volatile uint64_t*)(SPEC_GetL1PBase_priv()+L1P_CFG_PF_SYS-L1P_ESR); uint64_t pf_sys=*pf_sys_p; *pf_sys_p=pf_sys | L1P_CFG_PF_SYS_pf_invalidate_all; *pf_sys_p=pf_sys & ~L1P_CFG_PF_SYS_pf_invalidate_all; dci(); #else asm volatile ("dci 2"); #endif ppc_msync(); } else { Speculation_Restart(SPEC_GetSpeculationIDSelf_priv(), Kernel_SpecReturnCode_INVALID, &GetMyKThread()->Reg_State); return Kernel_SpecReturnCode_INVALID; } }
int UPC_Lock_woBlock(UPC_Lock_t *pLock) { uint64_t lockIndex = Upci_GetLockID(); uint32_t curValue; int rc = -1; //ppc_msync(); curValue = LoadReserved32( pLock ); while ((curValue == 0) && (!StoreConditional32(pLock, lockIndex))) { curValue = LoadReserved32( pLock ); } if (curValue == 0) { // got the lock rc = 0; ppc_msync(); // import mem sync - subsequent loads only occur after lock successful. } else if (curValue == lockIndex) { UPC_FATAL_ERR("Duplicate UPC_Lock() by thread lockIndex=%ld\n", lockIndex); // Terminate(lockIndex); // terminate test } return rc; }
/*=================================================================*/ void HPM_Init_t(int numthreads) { int i, j, k, core; // int threads_per_core; // int * eventSet; char * ptr; unsigned int tid, pid, cid; unsigned int lock_status; int rc; // Upci_Mode_t Mode; tid = PhysicalThreadID(); // between 0 and 3 pid = PhysicalThreadIndex(); // between 0 and 67 cid = pid/4; if (pid == 0) { // set the initial cumulative counter values to zero for (k=0; k<MAX_CORES; k++) for (j=0; j<MAX_CODE_BLOCKS; j++) for (i=0; i<MAX_COUNTERS; i++) counter_sum[k][j][i] = 0LL; for (j=0; j<MAX_CODE_BLOCKS; j++) timebase_sum[j] = 0LL; for (j=0; j<MAX_CODE_BLOCKS; j++) for (i=0; i<6; i++) L2_sum[j][i] = 0LL; // keep track of code block starts and stops for (j=0; j<MAX_CODE_BLOCKS; j++) { block_starts[j] = 0; block_stops[j] = 0; } // set mask used for thread and core aggregation for (i=0; i<MAX_EVENTS; i++) mask[i] = 0; // check env variables // fixme ptr = fwext_getenv("HPM_GROUP"); if (ptr == NULL) { hpm_group = 0; } else hpm_group = hpm_atoi(ptr); // printf("hpm_group = %d\n", hpm_group); // hpm_group = 82; if (hpm_group < -1) hpm_group = 0; if (hpm_group > 99) hpm_group = 0; // fixme // ptr = fwext_getenv("HPM_SCOPE"); if (pid !=0) return; // if (ptr != NULL) { // if (strncasecmp(ptr,"process", 7) == 0) process_scope = 1; // if (strncasecmp(ptr,"node", 4) == 0) node_scope = 1; // } // fixme // ptr = fwext_getenv("HPM_METRICS"); // if (ptr != NULL) { // if (strncasecmp(ptr,"yes", 3) == 0) derived_metrics = 1; // } for (i=0; i<MAX_CORES; i++) coremask[i] = 1; // find the number of cores used by this process // fixme // numcores = 0; // for (i=0; i<MAX_CORES; i++) numcores += coremask[i]; numcores = 17; // determine the number of threads per core // numthreads = BgGetNumThreads(); // numthreads = 68; // threads_per_core = numthreads / numcores; // hpm_threads = threads_per_core; // fixme hpm_threads = 4; // optionally reset the number of threads per core that will be counted // fixme // ptr = fwext_getenv("HPM_THREADS"); // if (ptr != NULL) { // hpm_threads = fwext_atoi(ptr); // if (hpm_threads < 1) hpm_threads = 1; // if (hpm_threads > 4) hpm_threads = 4; // } // set num_events and num_counters based on hpm_group and hpm_threads switch (hpm_group) { case -1: num_events = 6; eventSet = exptSet; break; case 0: num_events = 6; eventSet = mySet; break; case 1: num_events = 12; if (hpm_threads > 2) hpm_threads = 2; eventSet = ldSet; break; case 2: num_events = 24; if (hpm_threads > 1) hpm_threads = 1; eventSet = fpuSet; break; case 3: num_events = 12; if (hpm_threads > 2) hpm_threads = 2; eventSet = fpSet0; break; case 30: num_events = 6; eventSet = fpSet00; break; case 31: num_events = 6; eventSet = fpSet01; break; case 4: num_events = 12; if (hpm_threads > 2) hpm_threads = 2; eventSet = fpSet1; break; case 40: num_events = 6; eventSet = fpSet10; break; case 41: num_events = 6; eventSet = fpSet11; break; case 5: num_events = 24; if (hpm_threads > 1) hpm_threads = 1; eventSet = fxuSet; break; case 6: num_events = 12; if (hpm_threads > 2) hpm_threads = 2; eventSet = fxSet0; break; case 60: num_events = 6; eventSet = fxSet00; break; case 61: num_events = 6; eventSet = fxSet01; break; case 7: num_events = 12; if (hpm_threads > 2) hpm_threads = 2; eventSet = fxSet1; break; case 70: num_events = 6; eventSet = fxSet10; break; case 71: num_events = 6; eventSet = fxSet11; break; case 81: num_events = 6; eventSet = l1pset0; break; case 82: num_events = 6; eventSet = l1pset1; break; case 83: num_events = 6; eventSet = l1pset2; break; default: break; } num_counters = num_events * hpm_threads; ppc_msync(); Upci_Mode_Init(&Mode[0], UPC_DISTRIB_MODE, UPC_CM_INDEP, 0); initialized = 1; ppc_msync(); } while ((initialized == 0) && (tid == 0)) { ; } if (tid == 0) { lock_status = 0; while (lock_status == 0) { lock_status = hpm_lock_acquire(); } core = cid; // initialize hardware counters // Upci_Mode_Init(&Mode[core], UPC_DISTRIB_MODE, UPC_CM_INDEP, core); Upci_Punit_Init(&Punit[core], &Mode[core], core); // UPC_L1p_SetMode(core, L1P_CFG_UPC_SWITCH); // use one thread per core to enable 24 different punit counters // add events to count, save hwthread in one of the reserved event handle slots k = 0; for (i=0; i<num_events; i++) { // hwthread 0 rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i], 0, &eventHandle[core][k]); if (rc != 0) printf("failed to add event %d\n", eventSet[i]); if (pid == 0) counter_index[k] = eventSet[i]; eventHandle[core][k].rsv[0] = 0; k++; } if (hpm_threads > 1) { for (i=0; i<num_events; i++) { // hwthread 2 rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i], 2, &eventHandle[core][k]); if (rc != 0) printf("failed to add event %d\n", eventSet[i]); if (pid == 0) counter_index[k] = eventSet[i]; eventHandle[core][k].rsv[0] = 2; k++; } } if (hpm_threads > 2) { for (i=0; i<num_events; i++) { // hwthread 1 rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i], 1, &eventHandle[core][k]); if (rc != 0) printf("failed to add event %d\n", eventSet[i]); if (pid == 0) counter_index[k] = eventSet[i]; eventHandle[core][k].rsv[0] = 1; k++; } } if (hpm_threads > 3) { for (i=0; i<num_events; i++) { // hwthread 3 rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i], 3, &eventHandle[core][k]); if (rc != 0) printf("failed to add event %d\n", eventSet[i]); if (pid == 0) counter_index[k] = eventSet[i]; eventHandle[core][k].rsv[0] = 3; k++; } } rc = Upci_Punit_Apply(&Punit[core]); if (rc != 0) printf("Upci_Punit_Apply failed\n"); Upci_Punit_Start(&Punit[core], (UPCI_CTL_RESET | UPCI_CTL_DELAY)); // printf("Initialised upc by core = %d\n", cid); // Upci_Punit_Dump(2, &Punit[core]); lock_val = 0; ppc_msync(); } if (pid == 0) { UPC_L2_EnableUPC(1, 1); UPC_L2_Start(); } // PMPI_Barrier(local_comm); L2_Barrier(&id_barrier, numthreads); return; }
int main(int argc, char **argv) { size_t shm_length = sysconf(_SC_PAGE_SIZE); int is_creator = 1; // try to create the shared memory area int fd = shm_open(SHM_NAME, O_RDWR | O_CREAT | O_EXCL, S_IRWXU | S_IRWXG); if(fd<0) { printf("creating shm area with O_CREAT did not work, %d %s\n", errno, strerror(errno)); // we could not create the shm area -- probably it is already there fd = shm_open(SHM_NAME, O_RDWR, 0); if(fd<0) { printf("error opening (existing) shared memory area, %d %s\n", errno, strerror(errno)); if(errno == 38) printf("hint: Do you have /dev/shm mounted? Sorry for the inconvenience, I wanted to do it the \"proper\" way.\n"); return -1; } is_creator = 0; // note that we are not the creator of the shm area } // extend file to matching size if(ftruncate(fd, shm_length)) { printf("error in ftruncate of shared memory area, %d %s\n", errno, strerror(errno)); return -1; } void * shm_area = mmap(0, shm_length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if(shm_area == MAP_FAILED) { printf("error mmapping the shared memory area, %d %s\n", errno, strerror(errno)); close(fd); return -1; } close(fd); // file descriptor not needed any more after mmap volatile unsigned int * counter = (unsigned int *) shm_area; printf("pid %d has shm area at addr %p, current value %u\n", getpid(), counter, *counter); if(is_creator) { printf("pid %d is creator of the shm area\n", getpid()); *counter = 0; } else { printf("pid %d DID NOT CREATE the shm area\n", getpid()); sleep(1); // crude way to wait for counter initialization } int i = 0; while( *counter < COUNT ) { (*counter)++; i++; ppc_msync(); } printf("pid %d contributed %d, counter at %u\n", getpid(), i, *counter); return 0; }
int fw_l1p_init( void ) { TRACE_ENTRY(TRACE_L1P); uint64_t cfg_spec = L1P_CFG_SPEC_l1_hit_fwd_l2; uint64_t cfg_pf_usr = L1P_CFG_PF_USR_dfetch_depth(2) | L1P_CFG_PF_USR_dfetch_max_footprint(7) | L1P_CFG_PF_USR_ifetch_depth(0) | L1P_CFG_PF_USR_ifetch_max_footprint(2) | L1P_CFG_PF_USR_pf_stream_est_on_dcbt // [DISABLED] | L1P_CFG_PF_USR_pf_stream_optimistic | L1P_CFG_PF_USR_pf_stream_prefetch_enable | L1P_CFG_PF_USR_pf_stream_establish_enable | L1P_CFG_PF_USR_pf_adaptive_enable | L1P_CFG_PF_USR_pf_adaptive_throttle(0xF) ; /* UNUSED | L1P_CFG_PF_USR_pf_list_enable */ uint64_t cfg_pf_sys = L1P_CFG_PF_SYS_msync_timer(7+3) | L1P_CFG_PF_SYS_pfhint_enable | L1P_CFG_PF_SYS_whint_evict_enable | L1P_CFG_PF_SYS_whint_cracked_enable | L1P_CFG_PF_SYS_lock_prefetch | L1P_CFG_PF_SYS_dcbfl_discard | L1P_CFG_PF_SYS_pf_adaptive_total_depth(24) | L1P_CFG_PF_SYS_pf_hit_enable | L1P_CFG_PF_SYS_pf_stream_l2_op_immediate ; if(!FW_DD1_WORKAROUNDS_ENABLED()) { cfg_pf_sys |= L1P_CFG_PF_SYS_wrap_bug_dd2_bhv; } uint64_t cfg_wc = L1P_CFG_WC_wc_enable | L1P_CFG_WC_wc_suppress_if_all_be | L1P_CFG_WC_wc_aging ; uint64_t cfg_to = L1P_CFG_TO_to_en | L1P_CFG_TO_to_reload_en | L1P_CFG_TO_to_duration(0x3) ; uint64_t cfg_upc = L1P_CFG_UPC_ENABLE | L1P_CFG_UPC_STREAM; out64_sync((void *)L1P_CFG_SPEC,cfg_spec); out64_sync((void *)L1P_CFG_PF_USR,cfg_pf_usr); out64_sync((void *)L1P_CFG_PF_SYS,L1P_CFG_PF_SYS_pf_adaptive_reset|cfg_pf_sys); out64_sync((void *)L1P_CFG_PF_SYS,cfg_pf_sys); out64_sync((void *)L1P_CFG_WC,cfg_wc); out64_sync((void *)L1P_CFG_TO,cfg_to); out64_sync((void *)L1P_CFG_UPC,cfg_upc); /* Enable L1p hardware error interrupts */ uint64_t esr_gea = // [disabled] L1P_ESR_int_list_0 | // [disabled] L1P_ESR_int_list_1 | // [disabled] L1P_ESR_int_list_2 | // [disabled] L1P_ESR_int_list_3 | // [disabled] L1P_ESR_int_list_4 | // [disabled] L1P_ESR_int_speculation_0 | // [disabled] L1P_ESR_int_speculation_1 | // [disabled] L1P_ESR_int_speculation_2 | // [disabled] L1P_ESR_int_speculation_3 | // [disabled] L1P_ESR_err_valid_timeout | [see bqcbugs #1612] L1P_ESR_err_luq_ovfl | L1P_ESR_err_sr_p | L1P_ESR_err_sr_rd_valid_p | L1P_ESR_err_sw_p | L1P_ESR_err_si_ecc_ue | L1P_ESR_err_si_p | L1P_ESR_err_sda_p_ue | L1P_ESR_err_rqra_p | L1P_ESR_err_reload_ecc_ue_x2 | L1P_ESR_err_rira_p | L1P_ESR_err_gctr_p | L1P_ESR_err_lu_state_p | L1P_ESR_err_lu_ttype | // [5470] L1P_ESR_err_lu_dcr_abort | L1P_ESR_err_mmio_async | L1P_ESR_err_mmio_state_p | L1P_ESR_err_mmio_timeout | L1P_ESR_err_mmio_priv | L1P_ESR_err_mmio_rdata_p | L1P_ESR_err_mmio_wdata_p | L1P_ESR_err_mmio_dcrs_timeout | L1P_ESR_err_mmio_dcrs_priv | L1P_ESR_err_mmio_dcrs_par | L1P_ESR_err_dcrm_crit | L1P_ESR_err_dcrm_noncrit | // [5470] L1P_ESR_err_dcrm_mc | L1P_ESR_err_tag_timeout | L1P_ESR_err_hold_timeout | L1P_ESR_err_ditc_req_x2 | L1P_ESR_err_pfd_addr_p | L1P_ESR_err_pfd_avalid_p | L1P_ESR_err_pfd_fill_pnd_p | L1P_ESR_err_pfd_hit_pnd_p | L1P_ESR_err_pfd_stream_p | L1P_ESR_err_pfd_depth_p | L1P_ESR_err_pfd_clone_p | L1P_ESR_err_hitq_p | L1P_ESR_err_sd_p | L1P_ESR_err_pf2dfc_p | L1P_ESR_err_wccm_p_x2 | L1P_ESR_err_wccm_wcd_p_x2 | L1P_ESR_err_lu_wcd_p | L1P_ESR_err_lu_current_p | L1P_ESR_err_l2cmd | L1P_ESR_err_lu_dcr_dbus_p | L1P_ESR_err_luq_p | L1P_ESR_err_sda_phase_p | L1P_ESR_slice_sel_ctrl_perr | L1P_ESR_redun_ctrl_perr ; // +------------------------------------------------------------------------------------------+ // | NOTE: For production environments, we mask L1P correctables during the early part of the | // | boot. The TakeCPU hook is what allows us to unmask. | // +------------------------------------------------------------------------------------------+ if ( ! PERS_ENABLED(PERS_ENABLE_TakeCPU) ) { esr_gea |= L1P_ESR_err_si_ecc | L1P_ESR_err_reload_ecc_x2 | L1P_ESR_err_sda_p ; } out64_sync( (void *)L1P_ESR_GEA, esr_gea ); #ifndef FW_PREINSTALLED_GEA_HANDLERS uint64_t mask[3] = { L1P_GEA_MASK_0, L1P_GEA_MASK_1, L1P_GEA_MASK2 }; fw_installGeaHandler( fw_l1p_machineCheckHandler, mask ); #endif unsigned core = ProcessorCoreID(); DCRWritePriv( L1P_DCR(core,INTERRUPT_STATE_A_CONTROL_HIGH), L1P_DCR__INTERRUPT_STATE_A_CONTROL_HIGH__LOCAL_RING_set(1) | // Global Interrupt 0 ); DCRWritePriv( L1P_DCR(core,INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH), // [5470] L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_ADDRESS_set(1) | // [5470] L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_PRIV_set(1) | L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_DATA_PARITY_set(1) | L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_ADDRESS_PARITY_set(1) | 0 ); if(A2_isDD1()) { *(volatile uint64_t*)L1P_CFG_CLK_GATE = _B1(61,1); } else { *(volatile uint64_t*)L1P_CFG_CLK_GATE = L1P_CFG_CLK_GATE_clk_on_sw_req; } if(!FW_DD1_WORKAROUNDS_ENABLED()) { *(volatile uint64_t*)L1P_CFG_CHICKEN |= L1P_CFG_CHICKEN_DD2; } fw_l1p_resetCEThresholds(); #if 0 if ( ProcessorCoreID() == 2 ) { // DO NOT INTEGRATE THIS CODE!!!!!!!!!!! uint64_t inject = L1P_ESR_err_reload_ecc_x2 | //L1P_ESR_err_si_ecc | //L1P_ESR_err_reload_ecc_ue_x2 | 0 ; out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject ); ppc_msync(); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 ); ppc_msync(); } #endif TRACE_EXIT(TRACE_L1P); return( 0 ); }
int fw_l1p_machineCheckHandler( uint64_t mappedStatus[] ) { uint64_t core, pass; fw_uint64_t details[17*2*6]; unsigned n = 0; int rc = 0; // unless set otherwise below uint64_t correctableMask = FW_L1P_CORRECTABLE_MASK; // +--------------------------------------------------------------------+ // | HW Issue 1596 workaround: Disable DCR Arbiter Timeouts | // +--------------------------------------------------------------------+ uint64_t dcr_arbiter_ctrl_low = DCRReadPriv(DC_ARBITER_DCR( INT_REG_CONTROL_LOW)); DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low & ~DC_ARBITER_DCR__INT_REG_CONTROL_LOW__NO_ACK_AFTER_REQ_set(-1) ); for ( pass = 0; pass < 2; pass++ ) { n = 0; uint64_t statusMask = GEA_DCR__GEA_MAPPED_INTERRUPT_STATUS0_0__L1P0_RT_INT_set(1); for (core = 0; core < 17; core++, statusMask >>= 1 ) { // +---------------------------------------------------------------------------------------+ // | HW Issue 1596 : Only perform a cross-core ESR access if the mapped status indicates | // | that it is interesting to do so. This reduces the exposure to the | // | hardware bug documented in that issue. | // +---------------------------------------------------------------------------------------+ if ( ( mappedStatus[0] & statusMask ) != 0 ) { uint64_t status, error; status = fw_l1p_readCrossCore( L1P_ESR_DCR(core) ); status &= ( ( pass == 0 ) ? correctableMask : ~correctableMask ); if ( status == 0 ) continue; if ( pass == 0 ) { fw_l1p_handleCorrectable( status, core ); // +------------------------------------------------------------------------------------------+ // | This funky little sequence seems to be required to clean up the A2 machine check bit | // | (bit 0) of the ESR. It came from Krishnan and does the following: | // | | // | o masks interrupts | // | o clears the MCSR[EXT] bit | // | o clears the L1P_ESR[0] bit | // | o re-enables interrupts | // | | // +------------------------------------------------------------------------------------------+ uint64_t thisCore = ProcessorCoreID(); uint64_t esrMask = in64( (void*)L1P_ESR_GEA_DCR(thisCore)); out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), 0 ); mtspr( SPRN_MCSR, mfspr(SPRN_MCSR) & ~MCSR_EXT ); out64_sync( (void*)L1P_ESR_DCR(thisCore), L1P_ESR_a2_machine_check ); out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), esrMask ); } else { details[n++] = L1P_ESR_DCR(core); details[n++] = status; status = L1P_DCR_PRIV_PTR(core)->interrupt_state_a__machine_check; error = L1P_DCR_PRIV_PTR(core)->interrupt_internal_error__machine_check; if ( status != 0 ) { details[n++] = L1P_DCR( core,INTERRUPT_STATE_A__MACHINE_CHECK); details[n++] = status; } if ( error != 0 ) { details[n++] = L1P_DCR(core,INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH); details[n++] = error; } rc = -1; } } } if ( n > 0 ) { fw_machineCheckRas( FW_RAS_L1P_MACHINE_CHECK, details, n, __FILE__, __LINE__ ); } } // +--------------------------------------------------------------------------+ // | HW Issue 1596 workaround: Force-clear any DCR timeouts and restore the | // | control register. | // +--------------------------------------------------------------------------+ DCRWritePriv( DC_ARBITER_DCR( INT_REG__STATE ), DC_ARBITER_DCR__INT_REG__NO_ACK_AFTER_REQ_set(1) ); ppc_msync(); DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low ); return rc; }
int test_main( void ) { int N = BgGetNumThreads(); int irritatorThread = 1; int iterations = 1; char* irritator = fwext_getenv("IRRITATOR"); char* testId = fwext_getenv("TEST"); char* itersStr = fwext_getenv("ITERATIONS"); if ( irritator != 0 ) { irritatorThread = fwext_strtoul( irritator, 0, 0 ); } if ( itersStr != 0 ) { iterations = fwext_strtoul( itersStr, 0, 0 ); } fwext_barrier( &barrier, N ); if ( ProcessorID() == irritatorThread ) { char* l2Slice = fwext_getenv("L2SLICE"); char* mc = fwext_getenv("MC"); int mci = 0; if ( mc != 0 ) { mci = fwext_strtoul( mc, 0, 0 ); } int i; for ( i = 0; i < iterations; i++ ) { if ( i > 0 ) { fwext_udelay( 250 * 1000 ); } printf("Test: %s L2Slice:%s MC:%s NumThreads:%d Iter:%d\n", testId, l2Slice, mc ? mc : "?", N, i ); if ( fwext_strcmp("BeDRAM",testId) == 0 ) { printf("Injecting ...\n"); uint64_t inject = BEDRAM_DCR__BEDRAM_INTERRUPT_STATUS__BEDRAM_EDRAM_ECC_set(1); DCRWritePriv( BEDRAM_DCR( BEDRAM_INTERRUPT_STATUS__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("ClkStopUnit",testId) == 0 ) { uint64_t inject = CS_DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1); DCRWritePriv( CS_DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DrArbiter",testId) == 0 ) { uint64_t inject = DC_ARBITER_DCR__INT_REG__RING_NOT_CLEAN_set(1); DCRWritePriv( DC_ARBITER_DCR( INT_REG__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__XSTOP_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_MARKING_STORE",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_ECC_MARKING_STORE_UPDATED ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_Correctable",testId) == 0 ) { //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_WRITE_BUFFER_CE); //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_CE ); DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE ); //uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); //DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_Threshold",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_ECC_ERROR_COUNTER_THRESHOLD_REACHED ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DevBus",testId) == 0 ) { uint64_t inject = DEVBUS_DCR__DB_INTERRUPT_STATE__SLAVE_FIFO_PARITY_set(1); DCRWritePriv( DEVBUS_DCR( DB_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("EnvMon",testId) == 0 ) { uint64_t inject = EN_DCR__ENVMON_INTERRUPT_STATE__FSM_CHECKSUM_FAIL_set(1); DCRWritePriv( EN_DCR( ENVMON_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("GEA",testId) == 0 ) { uint64_t inject = GEA_DCR__GEA_INTERRUPT_STATE__DEVBUS_CTL_PERR_set(1); DCRWritePriv( GEA_DCR( GEA_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #if 0 { uint64_t inject = L1P_DCR__INTERRUPT_INTERNAL_ERROR__BAD_DATA_PARITY_set(1); DCRWritePriv( L1P_DCR(0, INTERRUPT_INTERNAL_ERROR__FORCE ), inject ); ppc_msync(); } #endif if ( fwext_strcmp("L1P",testId) == 0 ) { //printf("Irritating L1 on core %d ...\n", irritatorThread); #if 0 BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(0), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(1), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(2), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(3), BIC_MACHINE_CHECK ); #endif int64_t NN ; printf("INJECTING!\n"); for ( NN = 0; NN < 10000000ull; NN++ ) { uint64_t inject = L1P_ESR_err_reload_ecc_x2 | //L1P_ESR_err_si_ecc | //L1P_ESR_err_reload_ecc_ue_x2 | 0 ; out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject ); ppc_msync(); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 ); ppc_msync(); } printf( "*********************************** Injected %ld CEs into L1P ***********************************************\n", NN); //printf("L1P_ESR --> %lx\n", in64( (uint64_t*)L1P_ESR)); } if ( fwext_strcmp("L1PBug",testId) == 0 ) { printf("Waiting for 2 seconds ...\n"); uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 2; while ( GetTimeBase() < end ); printf("Irritating L1 on core %d ...\n", irritatorThread); uint64_t inject = L1P_ESR_err_reload_ecc_x2 | //L1P_ESR_err_si_ecc | //L1P_ESR_err_reload_ecc_ue_x2 | 0 ; printf("injecting %lx\n", inject); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject ); ppc_msync(); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 ); ppc_msync(); } if ( fwext_strcmp("L2",testId) == 0 ) { uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_UE_set(1); unsigned slice = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("L2_Correctable",testId) == 0 ) { int II; for ( II=0; II < 111101; II++ ) { //uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_CE_set(1); uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__EDR_CE_set(1); unsigned slice = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), 0 ); ppc_msync(); } printf("Issued %d L2 CEs!\n", II); } if ( fwext_strcmp("L2CTR",testId) == 0 ) { uint64_t inject = L2_COUNTER_DCR__L2_INTERRUPT_STATE__BDRY_PAR_ERR_set(1); unsigned counter = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_COUNTER_DCR( counter, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("L2Central",testId) == 0 ) { uint64_t inject = L2_CENTRAL_DCR__L2_INTERRUPT_STATE__ECC_UE_set(1); DCRWritePriv( L2_CENTRAL_DCR( L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("Msgc",testId) == 0 ) { uint64_t inject = MS_GENCT_DCR__L2_INTERRUPT_STATE__TIMEOUT_E_set(1); DCRWritePriv( MS_GENCT_DCR( L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("MU",testId) == 0 ) { // A fatal with simple intinfo: uint64_t inject = MU_DCR__RME_INTERRUPTS0__RME_ERR7_set(1); DCRWritePriv( MU_DCR( RME_INTERRUPTS0__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("MU_Correctable",testId) == 0 ) { // Force a correctable error uint64_t inject = MU_DCR__IMU_ECC_INTERRUPTS__IMU_ECC_CE1_set(1); DCRWritePriv( MU_DCR( IMU_ECC_INTERRUPTS__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("ND",testId) == 0 ) { // A fatal with simple intinfo: uint64_t inject = ND_RESE_DCR__RESE_INTERRUPTS__LOCAL_RING_set(1); DCRWritePriv( ND_RESE_DCR( 7, RESE_INTERRUPTS__FORCE ), inject ); ppc_msync(); //fwext_getFwInterface()->deprecated.backdoorTest(0); } if ( fwext_strcmp("PCIe",testId) == 0 ) { ppc_msync(); uint64_t inject = PE_DCR__PCIE_INTERRUPT_STATE__CFG_PERR_set(1); DCRWritePriv( PE_DCR( PCIE_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("SerDes0",testId) == 0 ) { uint64_t inject = SERDES_LEFT_DCR__SERDES_INTERRUPT_STATE__A_PLLA_LOCK_LOST_set(1); DCRWritePriv( SERDES_LEFT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("SerDes1",testId) == 0 ) { #if 0 // simulation of issue 1811 DCRWritePriv( SERDES_RIGHT_DCR(SERDES_INTERRUPT_STATE_CONTROL_HIGH), SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE_CONTROL_HIGH__D_PLLA_LOCK_LOST_set(2) ); ppc_msync(); #endif uint64_t inject = SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE__D_PLLA_LOCK_LOST_set(1); printf("inject->%016lX\n", inject); DCRWritePriv( SERDES_RIGHT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("TestInt",testId) == 0 ) { uint64_t inject = TESTINT_DCR__TI_INTERRUPT_STATE__INT_PARITY_ERROR_set(1); DCRWritePriv( TESTINT_DCR( TI_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #ifdef TESTUPC #define UPC_C_MMIO ((upc_c_dcr_t*)(PHYMAP_MINADDR_UPC | PHYMAP_PRIVILEGEDOFFSET)) { UPC_C_MMIO->upc_c_interrupt_state__force = UPC_C_DCR__UPC_C_INTERRUPT_STATE__PRING_ERROR_set(1); // An error was detected on the Processor/L2 UPC Daisy Chain ppc_msync(); } #endif #ifdef TESTWakeup { uint64_t inject = _DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1); DCRWritePriv( _DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #endif } } else { if ( fwext_strcmp("L1PBug",testId) == 0 ) { if ( ( ProcessorID() % 4 ) == 0 ) { uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 10; while ( GetTimeBase() < end ) { uint64_t esr = in64( (uint64_t*)L1P_ESR ); out64_sync((void *)L1P_ESR, ~esr ); } //printf( "shutting down ...\n"); } } } fwext_barrier( &barrier, N ); fwext_barrier( &barrier, N ); exit(0); }
// Deliver an interprocessor interrupt - utility function // Parm1: target thread index. Valid values 0 through 67 // Parm2: function pointer to be called // Parm3: parameter supplied to the called function IPIHANDLER_Fcn_t IPI_DeliverInterrupt(int processorID, IPIHANDLER_Fcn_t handler, uint64_t parm1, uint64_t parm2 ) { Kernel_WriteFlightLog(FLIGHTLOG, FL_DELIVRIPI, processorID, (uint64_t)handler, parm1, parm2); //printf("Sending IPI to threadid %d\n", thread_index); // Validate the target if ((processorID) >= 0 && (processorID < CONFIG_MAX_CORES*CONFIG_HWTHREADS_PER_CORE)) { // Get my hardware thread state object HWThreadState_t *pHwt = GetHWThreadStateByProcessorID(ProcessorID()); // Determine if there is a previous request that has not completed if (pHwt->ipi_message[processorID].fcn) { // If the target hwthread is the same as our hardware thread, do not deadlock. // instead return the function pointer of the IPI that has not been handled. // If the caller is sending an IPI to itself, it must be prepared to handle this situation if (processorID == ProcessorID()) return (pHwt->ipi_message[processorID].fcn); // Lower our hwthread priority and spin waiting for the target thread to process a previous request ThreadPriority_Low(); while(pHwt->ipi_message[processorID].fcn) { // Is the target hardware thread beyond the point of accepting interrupts if (pHwt->appExitPhase == AppExit_Phase2) { // Just return. The target thread is essentially gone. // We should not be here if an IPI is being delivered from/to application or agent threads // of a job since in those conditions, the application processes and agents would not have // exited phase 1 of AppExit. We should only be here if the Tool control thread is attempting // to send an IPI to a thread that has since entered phase 2 of AppExit. ThreadPriority_Medium(); return NULL; } // Is a process_exit message pending against this hwthread and is the kthread which is attempting this IPI delivery // in the same process as the target of the exit operation? Also, are we trying to send an IPI to the same hardware // thread that has sent the IPI exit message to us? If all of these are true, we need to get out of the way since the // sender of the process_exit IPI will be waiting in a barrier for all of the threads of it's process to arrive. // Just abort this IPI since at this point there is nothing more important than allowing the exit to proceed. int i; for (i=0; i<64; i++) { IPI_Message_t* pIPImsg = (IPI_Message_t*)&(NodeState.CoreState[i/4].HWThreads[i%4].ipi_message[ProcessorID()]); if ((pIPImsg->fcn == IPI_handler_process_exit) && // Is there a process exit message pending to this hardare thread? (GetProcessByProcessorID(ProcessorID()) == GetMyKThread()->pAppProc) && // Are we running in a kthread that is part of this hardware thread's process? (i == processorID)) // Are we trying to send and IPI to the same hardware thread that has sent the IPI exit message to us? { // This is a situation that will not clear on its own. Toss the delivery of this IPI and allow the exit to proceed. ThreadPriority_Medium(); return NULL; } } // Is there an IPI pending directed to us which requires an ACK by the sender. If this is true, we must process the // pending request so that the target can proceed to an interruptable point and accept the previously pending request // thereby allowing us to make our request pending. IPI_DeadlockAvoidance(processorID); } ThreadPriority_Medium(); } pHwt->ipi_message[processorID].fcn = handler; pHwt->ipi_message[processorID].param1 = parm1; pHwt->ipi_message[processorID].param2 = parm2; ppc_msync(); // Write to the c2c send register BIC_REGISTER send_value = 0; send_value |= ((processorID & 0x3)+1); // Set the thread index value (values 1 through 4) send_value |= ((BIC_C2C_INTTYPE_EXTERNAL) << 3); // Indicate delivery as an External interrupt send_value |= (_BN(processorID>>2)) >> 42; // Set the core mask BIC_WriteInterruptSend(send_value); }
int fw_sync_timebase( void ) { uint64_t numloops = 10; uint64_t value; uint64_t rc; Personality_t *pers = &FW_Personality; uint64_t numthreads; uint64_t msr; uint64_t geamap8 = 0; if(!PERS_ENABLED(PERS_ENABLE_MU)) return 0; if(!PERS_ENABLED(PERS_ENABLE_ND)) return 0; msr = mfmsr(); mtmsr(msr & ~(MSR_EE | MSR_CE | MSR_ME)); isync(); numthreads = popcnt64(DCRReadPriv(TESTINT_DCR(THREAD_ACTIVE0))) + popcnt64(DCRReadPriv(TESTINT_DCR(THREAD_ACTIVE1))); if(PhysicalThreadID() == 0) { #define WU_MMIO_PRIV_BASE ((volatile unsigned long *)0x3ffe8001c00) #define SET_THREAD(i) ((0x300 + (i)*0x40) / sizeof (unsigned long)) WU_MMIO_PRIV_BASE[SET_THREAD(0)] = WU_DCR__THREAD0_WU_EVENT_SET__GEA_WU_EN_set(0x8); if(ProcessorID() == 0) { // Setup classroute 14. Identical to classroute 15. value = DCRReadPriv(ND_500_DCR(CTRL_GI_CLASS_14_15)); ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_I_insert(value, ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS15_UP_PORT_I_get(value)); ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_O_insert(value, ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS15_UP_PORT_O_get(value)); DCRWritePriv(ND_500_DCR(CTRL_GI_CLASS_14_15), value); ppc_msync(); // Initialize GI pulse MUSPI_GIInit (&GI, 14, 0); // Initialize the GI barrier interrupt on classroute 14 DCRWritePriv(MU_DCR(BARRIER_INT_EN), MU_DCR__BARRIER_INT_EN__CLASS14_set(4)); // Route MU MAP4 interrupt to GEA lane 12 (wakeup unit bit 0) geamap8 = DCRReadPriv(GEA_DCR(GEA_INTERRUPT_MAP8)); DCRWritePriv(GEA_DCR(GEA_INTERRUPT_MAP8), GEA_DCR__GEA_INTERRUPT_MAP8__MU_MAP4_set(12)); rc = MUSPI_GIBarrierInit(&GIBarrier, 15); } // do local barrier BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER); while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads) { } if(ProcessorID() == 0) { // Perform a barrier across all nodes. MUSPI_GIBarrierEnterAndWait(&GIBarrier); if ( rc != 0 ) { FW_Warning("MUSPI_GIBarrierInit for class route 15 returned rc = %ld.", rc); return -1; } // Start gsync counter (for debug) DCRWritePriv(TESTINT_DCR(GSYNC_CTR), -1); } doTimeSync(numloops); mtspr(SPRN_TENS, 0xf); } else if((ProcessorID() == 1) && (pers->Network_Config.PrimordialClassRoute.GlobIntUpPortOutputs == 0)) { BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER); createSendGIPulseThread(numloops); } else { BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER); mtspr(SPRN_TENC, 1 << ProcessorThreadID()); isync(); } // Wait for all hwthreads on node BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER); while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads * 2) { } if(ProcessorID() == 0) { value = DCRReadPriv(ND_500_DCR(CTRL_GI_CLASS_14_15)); ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_I_insert(value, 0); ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_O_insert(value, 0); DCRWritePriv(ND_500_DCR(CTRL_GI_CLASS_14_15), value); ppc_msync(); // Initialize the barrier structure. DCRWritePriv(MU_DCR(BARRIER_INT_EN), MU_DCR__BARRIER_INT_EN__CLASS14_set(0)); DCRWritePriv(GEA_DCR(GEA_INTERRUPT_MAP8), geamap8); } WU_MMIO_PRIV_BASE[SET_THREAD(0)] = WU_DCR__THREAD0_WU_EVENT_SET__GEA_WU_EN_set(0); BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER); while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads * 3) { } mtmsr(msr); isync(); return 0; }
// Note: they will/must wake in FIFO order. uint64_t Futex_Wait( uint32_t op_and_flags, KThread_t *pKThr, Futex_t* futex_vaddr, uint32_t futex_val, uint64_t timeout, uint64_t current_time ) { int thd_index = ProcessorID(); KThread_t *FtQ; int isShared = Futex_IsShared(futex_vaddr, op_and_flags); // Enter Short-Term Critical Section and Grab the Atomic Operations Control Lock //printf("[%d] before-crit: futex@%x = %d\n", core, futex_vaddr, *((int*)futex_vaddr)); uint64_t my_turn = Lock_AtomicAcquire(isShared); //printf("[%d] after-crit: futex@%x = %d\n", core, futex_vaddr, *((int*)futex_vaddr)); //_FutexRecord( _FUTHIST_WAIT, futex_vaddr, futex_val, *((uint32_t*)futex_vaddr) ); if ( *((uint32_t*)futex_vaddr) != futex_val ) { TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: EWOULDBLOCK address 0x%016lx futex_val %016lx, *futex_vaddr %d\n", thd_index, (uint64_t)futex_vaddr, (uint64_t)futex_val, *((uint32_t*)futex_vaddr) )); Lock_AtomicRelease(isShared, my_turn); return CNK_RC_FAILURE(EWOULDBLOCK); } TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: WAITING address 0x%016lx (kthread=0x%p) time=0x%016lx\n", thd_index, (uint64_t)futex_vaddr, pKThr, current_time)); Futex_State_t* futexTableEntry = Futex_findTableEntry(futex_vaddr, isShared, 1); if (futexTableEntry) { if (futexTableEntry->pKThr_Waiter_Next) { for (FtQ = futexTableEntry->pKThr_Waiter_Next; FtQ->FutexQueueNext; FtQ = FtQ->FutexQueueNext) { } FtQ->FutexQueueNext = pKThr; } else { futexTableEntry->pKThr_Waiter_Next = pKThr; } pKThr->FutexQueueNext = (KThread_t *)0; pKThr->FutexVAddr = futex_vaddr; pKThr->FutexValue = *futex_vaddr; pKThr->FutexTimeout = timeout; pKThr->FutexIsShared = isShared; ppc_msync(); // If this is a timed futex. Setup the udecr timer if (timeout) { if (timeout < current_time) { timeout = current_time+1; } Timer_enableFutexTimeout(current_time, timeout); } } else { TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: futexTableEntry == NULL\n", thd_index)); } Sched_Block(GetMyKThread(), SCHED_STATE_FUTEX ); Lock_AtomicRelease(isShared, my_turn); // TRACE( TRACE_Futex, ("(<) %s[%d]\n", __func__, core)); // The thread's non-volatile state has not been saved yet. Set a Pending // bit, which will result in a full state save and a call to Scheduler(). pKThr->Pending |= KTHR_PENDING_YIELD; return CNK_RC_SUCCESS(0); }
static int Futex_WakeQueue(Futex_State_t* futexTableEntry, int maxToWake, Futex_State_t* secondaryFutexTableEntry, uint16_t *unblockList) { int numberAwoken = 0; int thd_index = ProcessorID(); TRACE( TRACE_Futex, ("(I) %s[%d]: table:%p (vaddr:%p) num2wake:%d secondary:%p\n", __func__, thd_index, futexTableEntry, (futexTableEntry ? futexTableEntry->futex_vaddr : 0LL), maxToWake, secondaryFutexTableEntry)); if (!futexTableEntry) { TRACE( TRACE_Futex, ("(I) %s[%d]: futexTableEntry == NULL\n", __func__, thd_index)); return numberAwoken; } KThread_t* thread; for (thread = futexTableEntry->pKThr_Waiter_Next; thread; thread = futexTableEntry->pKThr_Waiter_Next) { // TRACE( TRACE_Futex, ("(D) %s[%d]: process thread:%08x (next->%08x\n", __func__, core, (unsigned)thread, (unsigned)(thread->FutexQueueNext))); // If we haven't hit the limit, wake up the thread: if ( numberAwoken < maxToWake ) { // Advance the futex table to the next entry now because we will be destroying the link // in the current entry: futexTableEntry->pKThr_Waiter_Next = thread->FutexQueueNext; TRACE( TRACE_Futex, ("(I) %s[%d]: waking kthread:%p processorid:%d for futex:%p\n", __func__, thd_index, thread, thread->ProcessorID, thread->FutexVAddr)); thread->Reg_State.gpr[3] = 0; // this is the result of the futex syscall thread->FutexQueueNext = (KThread_t *)0; thread->FutexVAddr = NULL; thread->FutexValue = 0; thread->FutexTimeout = 0; thread->FutexIsShared = 0; //thread->pad3 = 1; // TEMP PROBLEM ANALYSIS unblockList[numberAwoken] = GetTID(thread); // save the kthread in an abbreviated 2-byte format. numberAwoken++; ppc_msync(); } else if ( secondaryFutexTableEntry ) { // We are requeueing ... move the entire FIFO to the secondary // futex queue. Since the FIFO is a linked list, we can do // this by simply snipping off the remaining chain and pasting // it onto the secondary: KThread_t** tail; for (tail = &(secondaryFutexTableEntry->pKThr_Waiter_Next); *tail; tail = &((*tail)->FutexQueueNext)); // find the end of the secondary queue futexTableEntry->pKThr_Waiter_Next = 0; // The old queue is now empty *tail = thread; // Paste the queue remainder onto the end of the existing queue // Now update the entries for ( ; thread; thread = thread->FutexQueueNext ) { thread->FutexVAddr = secondaryFutexTableEntry->futex_vaddr; thread->FutexValue = *thread->FutexVAddr; ppc_msync(); } TRACE( TRACE_Futex, ("(I) %s[%d]: some waiters requeued to futex:%016lx (sys:%p) \n", __func__, thd_index, (uint64_t)secondaryFutexTableEntry->futex_vaddr, secondaryFutexTableEntry )); } else { break; } } // If the waiter list is now empty, then remove the entry from the table: if ( futexTableEntry->pKThr_Waiter_Next == NULL ) { futexTableEntry->futex_vaddr = 0; ppc_msync(); } if ( secondaryFutexTableEntry && (secondaryFutexTableEntry->pKThr_Waiter_Next == NULL) ) { secondaryFutexTableEntry->futex_vaddr = 0; ppc_msync(); } // TRACE( TRACE_Futex, ("(<) %s[%d]\n", __func__, core)); return numberAwoken; }