Exemple #1
0
int l1p_init()
{
    // Restore A2 hardware thread priority
    ThreadPriority_Medium();
    
    if(PhysicalThreadID() != 0)
        return 0;
    
    // Restore prefetcher state
    CoreState_t* cs = GetCoreStateByCore(PhysicalProcessorID());
    if(cs->default_l1p_init)
    {
        out64((void *)L1P_CFG_SPEC,   cs->default_l1p_cfgspec);
        out64((void *)L1P_CFG_PF_USR, cs->default_l1p_cfgpfusr);
        out64((void *)L1P_CFG_PF_SYS, cs->default_l1p_cfgpfsys);
        ppc_msync();
    }
    else
    {
        ppc_msync();
        cs->default_l1p_cfgspec  = in64((void*)L1P_CFG_SPEC);
        cs->default_l1p_cfgpfusr = in64((void*)L1P_CFG_PF_USR);
        cs->default_l1p_cfgpfsys = in64((void*)L1P_CFG_PF_SYS);
        cs->default_l1p_init     = 1;
        ppc_msync();
    }
    return 0;
}
Exemple #2
0
short LeaderLatch2(UPC_Barrier_t *pLock, volatile uint64_t *pstatus, volatile uint64_t *pstatus2)
{
    uint64_t curLock;


    // check if any other thread has the lock yet.
    curLock = LoadReserved( pLock );
    while (curLock == 0) {
        // quit if status is inactive
        if ((*pstatus || *pstatus2) == 0) {
            return LLATCH_CONTINUE;
        }

        // otherwise store threads index +1 as lock value.
        curLock = Kernel_PhysicalHWThreadIndex();
        curLock += 1;
        if ( StoreConditional( pLock, curLock )) {
            ppc_msync();  // export mem sync
            return LLATCH_LEADER;
        }

        // try again.
        curLock = LoadReserved( pLock );
    }

    // otherwise block on curLock
    uint64_t savpri = Set_ThreadPriority_Low();
    while ( *pLock ) {
        asm volatile ("nop; nop; nop; nop;");
    }
    Restore_ThreadPriority(savpri);
    ppc_msync();  // export mem sync for everyone else
    return LLATCH_CONTINUE;
}
Exemple #3
0
static inline void createSendGIPulseThread(int numloops)
{
    int loop;
    for(loop=0; loop<numloops; loop++)
    {
        mtspr(SPRN_TENC, 1 << ProcessorThreadID());
        isync();
        MUSPI_GISend(&GI);
        ppc_msync();
        MUSPI_GISendClear(&GI);
        ppc_msync();
    }
    mtspr(SPRN_TENC, 1 << ProcessorThreadID());
    isync();
}
Exemple #4
0
void UPC_Lock(UPC_Lock_t *pLock)
{
    uint64_t lockIndex = Upci_GetLockID();
    uint32_t curValue;

    //mbar();

    do {
        do {
            curValue = LoadReserved32( pLock );
            if (curValue == 0 ) {
                break;
            }
            else if (curValue == lockIndex) {
                UPC_FATAL_ERR("Duplicate UPC_Lock() by thread lockIndex=%ld\n", lockIndex);
                // Terminate(lockIndex);       // terminate test
            }
            else {
                uint64_t savpri = Set_ThreadPriority_Low();
                while ( *pLock ) { // spin till free
                    asm volatile ("nop; nop; nop; nop;");
                }
                Restore_ThreadPriority(savpri);
            }
        } while(1);
    } while (!StoreConditional32(pLock, lockIndex));
    // isync();   // create import barrier (lock seen before subsequent storage accesses)
    ppc_msync();

}
Exemple #5
0
/*!
 * \brief Initializes speculation registers before the start of a job.
 */
int Speculation_Init()
{
    int slice;
    uint64_t scrub_rate;
    L2C_SPECID_t specid;
    
    if(TI_isDD1() || ((GetPersonality()->Kernel_Config.NodeConfig & PERS_ENABLE_DD1_Workarounds) != 0))
    {
    }
    else
    {
        SPEC_SetNumberOfDomains(1);
        SPEC_SetPrivMap(
            L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_NUMDOM)  | 
            L2C_PRIVMAP_DISABLEWRITEFNC(L2C_PRIVMAP_FUNC_PRIVMAP) 
            );
        ppc_msync();

        for(specid=0; specid<128; specid++)
        {
            SPEC_TryChangeState_priv(specid, L2C_IDSTATE_PRED_SPEC | L2C_IDSTATE_INVAL);  
            SPEC_SetConflict_priv(specid, 0);
        }
        ppc_msync();
    }
    App_GetEnvValue("BG_SIMPLEROLLBACK", &SIMPLE_ROLLBACK);
    L2_AtomicStore(&SpecDomainsAllocated, 0);
    domainsConfigured = 0;
    
    // Reset the L2 scrub rate
    scrub_rate = 64;
    
    for(slice=0; slice<L2_DCR_num; slice++)
    {
        // Set the L2 scrub rate
        uint64_t l2_dcr_refctrl = DCRReadPriv(L2_DCR(slice, REFCTRL));
        if(default_l2_first_init)
            default_l2_scrub_rate[slice] = L2_DCR__REFCTRL__SCB_INTERVAL_get(l2_dcr_refctrl);
        L2_DCR__REFCTRL__SCB_INTERVAL_insert(l2_dcr_refctrl, default_l2_scrub_rate[slice]);
        DCRWritePriv(L2_DCR(slice, REFCTRL), l2_dcr_refctrl);
    }
    default_l2_first_init = 1;
    
    Speculation_ExitJailMode();
    return 0;
}
Exemple #6
0
void LeaderUnLatch(UPC_Barrier_t *pLock)
{
    ppc_msync();  // import mem sync

    // check if any other thread has the lock yet.
    do {
        LoadReserved( pLock );
    } while (!StoreConditional( pLock, 0 ) );
}
Exemple #7
0
// Assumes target number of threads is consistent by all threads, so no one thread needs to be the master.
short UPC_Barrier(UPC_Barrier_t *pLock, short num_threads, uint64_t timeout)
{
    uint64_t targ_num_threads = num_threads;
    uint64_t barr_timeout;  // Timeout hardcoded in cycles
    if (timeout == 0) timeout = 200000;
    barr_timeout = GetTimeBase() + timeout;

    ppc_msync();  // export mem sync

    // store initial value
    uint64_t curValue;
    curValue = LoadReserved( pLock );
    while (curValue == 0) {
        curValue = targ_num_threads;
        StoreConditional(pLock, curValue);  // let 1st writer win - so don't care if fails.
        curValue = LoadReserved( pLock );
    }

    // now atomically subtract 1 for this thread.
    do {
        curValue = LoadReserved( pLock );
        curValue--;
    } while (!StoreConditional( pLock, curValue));

    // now wait till value reaches zero
    uint64_t savpri = Set_ThreadPriority_Low();
    while (*pLock > 0) {
        if (GetTimeBase() > barr_timeout) {
            UPC_CRITICAL_WARNING("Timeout(2) on barr_target of 0x%016lx; cur target=0x%016lx.\n", targ_num_threads, curValue);
            break;       // end barrier
        };
    }
    Restore_ThreadPriority(savpri);

    ppc_msync();  // import mem sync

    return (*pLock);
}
Exemple #8
0
void UPC_Unlock(UPC_Lock_t *pLock)
{
    ppc_msync();  // import sync - prior store must complete before lock dropped.

    uint32_t lockIndex = (uint32_t)Upci_GetLockID();
    uint32_t curValue = *pLock;
    if (curValue != lockIndex) {
        UPC_FATAL_ERR("Improper UPC_Unlock for lockIndex=%d; curIndex=%d\n", lockIndex, curValue);
        // Terminate(lockIndex);       // terminate test
    }
    else {
        *pLock = 0;
    }
   // mbar();
}
Exemple #9
0
void Futex_Interrupt( KThread_t *pKThr )
{

    int isShared = pKThr->FutexIsShared; // create a local copy of the shared indicator 
    uint64_t my_turn = Lock_AtomicAcquire(isShared);

    Futex_State_t* futexTableEntry = Futex_findTableEntry(pKThr->FutexVAddr, isShared, 0);

    if (futexTableEntry != NULL)
    {
        if (futexTableEntry->pKThr_Waiter_Next == pKThr)
        {
            futexTableEntry->pKThr_Waiter_Next = pKThr->FutexQueueNext;
            // If there are no waiters, free the entry in the futex table.
            if (futexTableEntry->pKThr_Waiter_Next == NULL)
            {
                futexTableEntry->futex_vaddr = 0;
            }
        } 
        else
        {
            KThread_t *FtQ = futexTableEntry->pKThr_Waiter_Next;
            while ((FtQ != NULL) && (FtQ->FutexQueueNext != pKThr))
            {
                FtQ = FtQ->FutexQueueNext;
            }
            if (FtQ != NULL)
            {
                FtQ->FutexQueueNext = pKThr->FutexQueueNext;
            }
        }
    }
    pKThr->Reg_State.cr     |= CR_ERROR;  // syscall failed
    pKThr->Reg_State.gpr[3]  = EINTR; // this is the result of the futex syscall
    pKThr->FutexQueueNext    = (KThread_t *)0;
    pKThr->FutexVAddr        = NULL;
    pKThr->FutexValue        = 0;
    pKThr->FutexTimeout      = 0;
    pKThr->FutexIsShared     = 0;
    //pKThr->pad3 = 2; // TEMP PROBLEM ANALYSIS
    ppc_msync();

    Lock_AtomicRelease(isShared, my_turn);

    Sched_Unblock(pKThr, SCHED_STATE_FUTEX);

}
Exemple #10
0
int Speculation_CleanupJob()
{
    SPEC_SetSpeculationIDSelf_priv(0x400);   // clear interrupt state, clear lower 9 bits as well

    // Restore the default system-call and standard-interrupt code sequences.
    // See Speculation_EnableFastSpeculationPath() for commentary on this
    // process.  In this case we need an IPI only for the "system" core,
    // because this routine is called on every application hardware thread.
    uint64_t ici_needed = 0;
    Kernel_Lock(&FastPathsLock);
    if (FastPathsEnabled)
    {
	extern uint32_t Vector_EI_trampoline;
	extern uint32_t Vector_SC_trampoline;

	uint64_t exceptionVector = mfspr(SPRN_IVPR);
	*((uint32_t *) (exceptionVector + IVO_EI)) = Vector_EI_trampoline;
	*((uint32_t *) (exceptionVector + IVO_SC)) = Vector_SC_trampoline;
	ppc_msync();   // make sure the stores have taken effect
	FastPathsEnabled = 0;
	ici_needed = 1;  // we can't hold the lock while sending IPI's
    }
    Kernel_Unlock(&FastPathsLock);

    // Flush the icache whether or not we're the thread that did the patching.
    // We only need to do this from one thread on each core.
    if (ProcessorThreadID() == 0)
    {
	isync();
	ici();
    }

    if (ici_needed)
    {
	// We still need an IPI for the "system" core.
	IPI_invalidate_icache(NodeState.NumCoresEnabled - 1);
	Kernel_WriteFlightLog(FLIGHTLOG_high, FL_SPCFEPDIS, 0,0,0,0);
    }
    
    // bqcbugs 1620
    l2_set_overlock_threshold(0);
    l2_set_spec_threshold(0);
    l2_set_prefetch_enables(1);
    // --
    
    return 0;
}
Exemple #11
0
int Futex_Timeout(KThread_t* thread)
{
    KThread_t *FtQ;

    Futex_State_t* futexTableEntry = Futex_findTableEntry(thread->FutexVAddr, thread->FutexIsShared, 0);
    if (futexTableEntry != NULL)
    {
        if (futexTableEntry->pKThr_Waiter_Next == thread)
        {
            futexTableEntry->pKThr_Waiter_Next = thread->FutexQueueNext;
            // If we have no waiters on this futex address, we must remove it from the table.
            if (futexTableEntry->pKThr_Waiter_Next == 0)
            {
                futexTableEntry->futex_vaddr = 0;
            }
        }
        else
        {
	    FtQ = futexTableEntry->pKThr_Waiter_Next;
	    while ((FtQ != NULL) && (FtQ->FutexQueueNext != thread))
	    {
		FtQ = FtQ->FutexQueueNext;
	    }
	    if (FtQ != NULL)
	    {
		FtQ->FutexQueueNext = thread->FutexQueueNext;
	    }
        }
    }
    thread->Reg_State.cr     |= CR_ERROR;  // syscall failed
    thread->Reg_State.gpr[3] = ETIMEDOUT; // this is the result of the futex syscall
    thread->FutexQueueNext    = (KThread_t *)0;
    thread->FutexVAddr        = NULL;
    thread->FutexValue        = 0;
    thread->FutexTimeout      = 0;
    thread->FutexIsShared     = 0;
    //thread->pad3 = 3; // TEMP PROBLEM ANALYSIS
    ppc_msync();

    Sched_Unblock( thread, SCHED_STATE_FUTEX );

    return 0;
}
Exemple #12
0
/*!
 * \brief Allocates a speculative domain. 
 * \note Each additional domain potentially decreases the number of speculative IDs assigned to each domain.
 * \note Domains must have all speculative IDs set to the available state
 */
int Speculation_AllocateDomain(unsigned int* domain)
{
#if 0
    const unsigned char domainmap[17] = { 1, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16};
    if(NodeState.NumSpecDomains >= 16)
    {
        return ENOMEM;
    }
    if(!SPEC_AllAvailOrInvalid())
    {
        // if any ID is speculative or committed, we can not switch
        // \todo this needs to be made safe. 
        // While switching, we can not allow threads to alloce concurrently (race)
        // TM domains need to be made aware of changes to adapt their allocation mask
        // In short, all speculation RTS need to be shut down temporarily while changing number of domains
        return ENOMEM;
    }
    NodeState.NumSpecDomains++;
    SPEC_SetNumberOfDomains( domainmap[NodeState.NumSpecDomains] );
    SPEC_SetDomainMode_priv(NodeState.NumSpecDomains-1, L2C_DOMAINATTR_MODE_STM);
    ppc_msync();
    
    /* \todo Initialize commit, alloc, reclaim pointers??? */
    
    *domain = NodeState.NumSpecDomains-1;
#endif
    uint32_t domainAllocated = L2_AtomicLoadIncrement(&SpecDomainsAllocated);
    if(domainAllocated >= SPEC_GetNumberOfDomains())
    {
        return ENOMEM;
    }
    
    // bqcbugs 1620.
    l2_set_prefetch_enables(0);
    l2_unlock_all_with_address((void *) 0x200000);
    l2_set_overlock_threshold(0xA);                      // set L2 overlock and spec thresholds
    l2_set_spec_threshold(0xA);
    // --
    
    Kernel_WriteFlightLog(FLIGHTLOG_high, FL_SPCALCDOM, domainAllocated,0,0,0);
    *domain = domainAllocated;
    return 0;
}
Exemple #13
0
int Speculation_EnterJailMode(bool longRunningSpec)
{
    AppProcess_t* process = GetMyProcess();
    if (process != GetProcessByProcessorID(ProcessorID()))
    {
        Speculation_Restart(SPEC_GetSpeculationIDSelf_priv(), Kernel_SpecReturnCode_INVALID, &GetMyKThread()->Reg_State);
        return Kernel_SpecReturnCode_INVALID;
    }
    if(longRunningSpec)
    {
        uint64_t SpecPID;
        uint32_t ProcessOvercommit = 64 / GetMyAppState()->Active_Processes;
        if(ProcessOvercommit > 4) ProcessOvercommit = 4;
        vmm_getSpecPID(process->Tcoord, ProcessorThreadID() % ProcessOvercommit, &SpecPID);
        
        if(SpecPID)
        {
            mtspr(SPRN_PID, SpecPID);
            isync();
            
            // A2 does not reliably notify A2 of DCI
#if 0
            volatile uint64_t* pf_sys_p=(volatile uint64_t*)(SPEC_GetL1PBase_priv()+L1P_CFG_PF_SYS-L1P_ESR);
            uint64_t pf_sys=*pf_sys_p;
            *pf_sys_p=pf_sys | L1P_CFG_PF_SYS_pf_invalidate_all;
            *pf_sys_p=pf_sys & ~L1P_CFG_PF_SYS_pf_invalidate_all;
            dci();
#else
            asm volatile ("dci 2");
#endif
            ppc_msync();
        }
        else
        {
            Speculation_Restart(SPEC_GetSpeculationIDSelf_priv(), Kernel_SpecReturnCode_INVALID, &GetMyKThread()->Reg_State);
            return Kernel_SpecReturnCode_INVALID;
        }
    }
Exemple #14
0
int UPC_Lock_woBlock(UPC_Lock_t *pLock)
{
    uint64_t lockIndex = Upci_GetLockID();
    uint32_t curValue;
    int rc = -1;
    //ppc_msync();

    curValue = LoadReserved32( pLock );
    while ((curValue == 0) && (!StoreConditional32(pLock, lockIndex))) {
        curValue = LoadReserved32( pLock );
    }

    if (curValue == 0) {
        // got the lock
        rc = 0;
        ppc_msync();  // import mem sync - subsequent loads only occur after lock successful.
    }
    else if (curValue == lockIndex) {
        UPC_FATAL_ERR("Duplicate UPC_Lock() by thread lockIndex=%ld\n", lockIndex);
        // Terminate(lockIndex);       // terminate test
    }

    return rc;
}
Exemple #15
0
/*=================================================================*/
void HPM_Init_t(int numthreads)
{
  int i, j, k, core;
  // int threads_per_core;
  //  int * eventSet;
  char * ptr;
  unsigned int tid, pid, cid;
  unsigned int lock_status;

  int rc;
//  Upci_Mode_t Mode;

  tid = PhysicalThreadID();    // between 0 and 3
  pid = PhysicalThreadIndex(); // between 0 and 67
  cid = pid/4;

  if (pid == 0)
    {
      // set the initial cumulative counter values to zero 
      for (k=0; k<MAX_CORES; k++)
	for (j=0; j<MAX_CODE_BLOCKS; j++)
	  for (i=0; i<MAX_COUNTERS; i++)
	    counter_sum[k][j][i] = 0LL;
	
      for (j=0; j<MAX_CODE_BLOCKS; j++) timebase_sum[j] = 0LL;
	
      for (j=0; j<MAX_CODE_BLOCKS; j++) 
	for (i=0; i<6; i++)
	  L2_sum[j][i] = 0LL;
	
      // keep track of code block starts and stops 
      for (j=0; j<MAX_CODE_BLOCKS; j++) {
	block_starts[j] = 0;
	block_stops[j]  = 0;
      }
	
      // set mask used for thread and core aggregation
      for (i=0; i<MAX_EVENTS; i++) mask[i] = 0;
	
      // check env variables
      // fixme
      ptr = fwext_getenv("HPM_GROUP");
      if (ptr == NULL)  {
        hpm_group = 0;
      }
      else hpm_group = hpm_atoi(ptr);
      // printf("hpm_group = %d\n", hpm_group);
      // hpm_group = 82;
      if (hpm_group < -1) hpm_group = 0;
      if (hpm_group > 99) hpm_group = 0;
	
      // fixme
      // ptr = fwext_getenv("HPM_SCOPE");   if (pid !=0) return;
      
      // if (ptr != NULL) {
      //          if (strncasecmp(ptr,"process", 7) == 0) process_scope = 1;
      //          if (strncasecmp(ptr,"node", 4) == 0)    node_scope = 1;
      // }
	
      // fixme
      // ptr = fwext_getenv("HPM_METRICS");
      // if (ptr != NULL) {
      //          if (strncasecmp(ptr,"yes", 3) == 0) derived_metrics = 1;
      // }
	
      for (i=0; i<MAX_CORES; i++) coremask[i] = 1;
	
      // find the number of cores used by this process
      // fixme
      // numcores = 0;
      // for (i=0; i<MAX_CORES; i++) numcores += coremask[i];
      numcores = 17;
	
      // determine the number of threads per core
      // numthreads = BgGetNumThreads();
      // numthreads = 68;
      // threads_per_core = numthreads / numcores;
	
      // hpm_threads = threads_per_core;
      // fixme
      hpm_threads = 4;
      	
      // optionally reset the number of threads per core that will be counted
      // fixme
      // ptr = fwext_getenv("HPM_THREADS");
      // if (ptr != NULL) {
      //         hpm_threads = fwext_atoi(ptr);
      // if (hpm_threads < 1) hpm_threads = 1;
      // if (hpm_threads > 4) hpm_threads = 4;
      // }
	
      // set num_events and num_counters based on hpm_group and hpm_threads
      switch (hpm_group) {
      case -1:
	num_events = 6;
	eventSet = exptSet;
	break;
	
      case 0:
	num_events = 6;
	eventSet = mySet;
	break;
	
      case 1:
	num_events = 12;
	if (hpm_threads > 2) hpm_threads = 2;
	eventSet = ldSet;
	break;
	
      case 2:
	num_events = 24;
	if (hpm_threads > 1) hpm_threads = 1;
	eventSet = fpuSet;
	break;
	
      case 3:
	num_events = 12;
	if (hpm_threads > 2) hpm_threads = 2;
	eventSet = fpSet0;
	break;
	
      case 30:
	num_events = 6;
	eventSet = fpSet00;
	break;
	
      case 31:
	num_events = 6;
	eventSet = fpSet01;
	break;
	
      case 4:
	num_events = 12;
	if (hpm_threads > 2) hpm_threads = 2;
	eventSet = fpSet1;
	break;
	
      case 40:
	num_events = 6;
	eventSet = fpSet10;
	break;
	
      case 41:
	num_events = 6;
	eventSet = fpSet11;
	break;
	
      case 5:
	num_events = 24;
	if (hpm_threads > 1) hpm_threads = 1;
	eventSet = fxuSet;
	break;
	
      case 6:
	num_events = 12;
	if (hpm_threads > 2) hpm_threads = 2;
	eventSet = fxSet0;
	break;
	
      case 60:
	num_events = 6;
	eventSet = fxSet00;
	break;
	
      case 61:
	num_events = 6;
	eventSet = fxSet01;
	break;
	
      case 7:
	num_events = 12;
	if (hpm_threads > 2) hpm_threads = 2;
	eventSet = fxSet1;
	break;
	
      case 70:
	num_events = 6;
	eventSet = fxSet10;
	break;
	
      case 71:
	num_events = 6;
	eventSet = fxSet11;
	break;

      case 81:
	num_events = 6;
	eventSet = l1pset0;
	break;

      case 82:
	num_events = 6;
	eventSet = l1pset1;
	break;

      case 83:
	num_events = 6;
	eventSet = l1pset2;
	break;
	
      default:
	break;
	
      }
	
      num_counters =  num_events * hpm_threads;
      ppc_msync();
      Upci_Mode_Init(&Mode[0], UPC_DISTRIB_MODE, UPC_CM_INDEP, 0);
      initialized = 1;
      ppc_msync();
    }
      
  
  while ((initialized == 0) && (tid == 0))
    {
      ;	
    }

  if (tid == 0) {

    lock_status = 0;
    while (lock_status == 0)
      {
	lock_status = hpm_lock_acquire();
      }
       
    core = cid;

    // initialize hardware counters
    // Upci_Mode_Init(&Mode[core], UPC_DISTRIB_MODE, UPC_CM_INDEP, core);
    Upci_Punit_Init(&Punit[core], &Mode[core], core);

    // UPC_L1p_SetMode(core, L1P_CFG_UPC_SWITCH);
       
    // use one thread per core to enable 24 different punit counters
       
    // add events to count, save hwthread in one of the reserved event handle slots
    k = 0;
    for (i=0; i<num_events; i++) {                         // hwthread 0
      rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i],  0, &eventHandle[core][k]);
      if (rc != 0) printf("failed to add event %d\n", eventSet[i]);
      if (pid == 0)
	counter_index[k] = eventSet[i];
      eventHandle[core][k].rsv[0] = 0;
      k++;
    }
    if (hpm_threads > 1) {
      for (i=0; i<num_events; i++) {                         // hwthread 2
	rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i],  2, &eventHandle[core][k]);
	if (rc != 0) printf("failed to add event %d\n", eventSet[i]);
	if (pid == 0)
	  counter_index[k] = eventSet[i];
	eventHandle[core][k].rsv[0] = 2;
	k++;
      }
    }
    if (hpm_threads > 2) {
      for (i=0; i<num_events; i++) {                         // hwthread 1
	rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i],  1, &eventHandle[core][k]);
	if (rc != 0) printf("failed to add event %d\n", eventSet[i]);
	if (pid == 0)
	  counter_index[k] = eventSet[i];
	eventHandle[core][k].rsv[0] = 1;
	k++;
      }
    }
    if (hpm_threads > 3) {
      for (i=0; i<num_events; i++) {                         // hwthread 3
	rc = Upci_Punit_AddEvent(&Punit[core], eventSet[i],  3, &eventHandle[core][k]);
	if (rc != 0) printf("failed to add event %d\n", eventSet[i]);
	if (pid == 0)
	  counter_index[k] = eventSet[i];
        eventHandle[core][k].rsv[0] = 3;
	k++;
      }
    }
    

    rc = Upci_Punit_Apply(&Punit[core]);
    if (rc != 0) printf("Upci_Punit_Apply failed\n");
    
    Upci_Punit_Start(&Punit[core], (UPCI_CTL_RESET | UPCI_CTL_DELAY));
    // printf("Initialised upc by core = %d\n", cid);
    // Upci_Punit_Dump(2, &Punit[core]);

    lock_val = 0;
    ppc_msync();
    
  }

  if (pid == 0)
    {
      UPC_L2_EnableUPC(1, 1);  
      UPC_L2_Start();
    }

  // PMPI_Barrier(local_comm);

  L2_Barrier(&id_barrier, numthreads);
     
  return;
}
Exemple #16
0
int main(int argc, char **argv) {

	size_t shm_length = sysconf(_SC_PAGE_SIZE);

	int is_creator = 1;
	// try to create the shared memory area
    int fd = shm_open(SHM_NAME, O_RDWR | O_CREAT | O_EXCL, S_IRWXU | S_IRWXG);

	if(fd<0) {
		printf("creating shm area with O_CREAT did not work, %d %s\n", errno, strerror(errno));

		// we could not create the shm area -- probably it is already there 
		fd = shm_open(SHM_NAME, O_RDWR, 0);

		if(fd<0) {
			printf("error opening (existing) shared memory area, %d %s\n", errno, strerror(errno));
			if(errno == 38)
				printf("hint: Do you have /dev/shm mounted? Sorry for the inconvenience, I wanted to do it the \"proper\" way.\n");
			return -1;
		}

		is_creator = 0; // note that we are not the creator of the shm area
	}

	// extend file to matching size
	if(ftruncate(fd, shm_length)) {
		printf("error in ftruncate of shared memory area, %d %s\n", errno, strerror(errno));
		return -1;
	}

	void * shm_area = mmap(0, shm_length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

	if(shm_area == MAP_FAILED) {
		printf("error mmapping the shared memory area, %d %s\n", errno, strerror(errno));
		close(fd);
		return -1;
	}

	close(fd); // file descriptor not needed any more after mmap

	volatile unsigned int * counter = (unsigned int *) shm_area;

	printf("pid %d has shm area at addr %p, current value %u\n", getpid(), counter, *counter);

	if(is_creator) {
		printf("pid %d is creator of the shm area\n", getpid());
		*counter = 0;
	} else {
		printf("pid %d DID NOT CREATE the shm area\n", getpid());
		sleep(1); // crude way to wait for counter initialization
	}


	int i = 0;

	while( *counter < COUNT ) {
		(*counter)++;
		i++;
		ppc_msync();
	}

	printf("pid %d contributed %d, counter at %u\n", getpid(), i, *counter);

	return 0;
}
Exemple #17
0
int fw_l1p_init( void )  {

  TRACE_ENTRY(TRACE_L1P);

  uint64_t cfg_spec   = L1P_CFG_SPEC_l1_hit_fwd_l2;

  uint64_t cfg_pf_usr = L1P_CFG_PF_USR_dfetch_depth(2)
                      | L1P_CFG_PF_USR_dfetch_max_footprint(7)   
                      | L1P_CFG_PF_USR_ifetch_depth(0)       
                      | L1P_CFG_PF_USR_ifetch_max_footprint(2)   
                      | L1P_CFG_PF_USR_pf_stream_est_on_dcbt 
      // [DISABLED] | L1P_CFG_PF_USR_pf_stream_optimistic  
                      | L1P_CFG_PF_USR_pf_stream_prefetch_enable
                      | L1P_CFG_PF_USR_pf_stream_establish_enable
                      | L1P_CFG_PF_USR_pf_adaptive_enable    
                      | L1P_CFG_PF_USR_pf_adaptive_throttle(0xF) ;
    /* UNUSED         | L1P_CFG_PF_USR_pf_list_enable        */
  
  uint64_t cfg_pf_sys = 
      L1P_CFG_PF_SYS_msync_timer(7+3)            
      |  L1P_CFG_PF_SYS_pfhint_enable             
      |  L1P_CFG_PF_SYS_whint_evict_enable        
      |  L1P_CFG_PF_SYS_whint_cracked_enable      
      |  L1P_CFG_PF_SYS_lock_prefetch             
      |  L1P_CFG_PF_SYS_dcbfl_discard             
      |  L1P_CFG_PF_SYS_pf_adaptive_total_depth(24)
      |  L1P_CFG_PF_SYS_pf_hit_enable              
      |  L1P_CFG_PF_SYS_pf_stream_l2_op_immediate  ;
  
  if(!FW_DD1_WORKAROUNDS_ENABLED())
  {
      cfg_pf_sys |= L1P_CFG_PF_SYS_wrap_bug_dd2_bhv;
  }
  
  uint64_t cfg_wc    =  L1P_CFG_WC_wc_enable
      | L1P_CFG_WC_wc_suppress_if_all_be 
      | L1P_CFG_WC_wc_aging ;

  uint64_t cfg_to    =  L1P_CFG_TO_to_en 
      | L1P_CFG_TO_to_reload_en
      | L1P_CFG_TO_to_duration(0x3) ;

  uint64_t cfg_upc   =  L1P_CFG_UPC_ENABLE
      | L1P_CFG_UPC_STREAM;

  out64_sync((void *)L1P_CFG_SPEC,cfg_spec);
  
  out64_sync((void *)L1P_CFG_PF_USR,cfg_pf_usr);
  
  out64_sync((void *)L1P_CFG_PF_SYS,L1P_CFG_PF_SYS_pf_adaptive_reset|cfg_pf_sys);
  out64_sync((void *)L1P_CFG_PF_SYS,cfg_pf_sys);
  
  out64_sync((void *)L1P_CFG_WC,cfg_wc);
  
  out64_sync((void *)L1P_CFG_TO,cfg_to);

  out64_sync((void *)L1P_CFG_UPC,cfg_upc);

  /* Enable L1p hardware error interrupts */

  uint64_t esr_gea =
      // [disabled] L1P_ESR_int_list_0 |
      // [disabled] L1P_ESR_int_list_1 |
      // [disabled] L1P_ESR_int_list_2 |
      // [disabled] L1P_ESR_int_list_3 |
      // [disabled] L1P_ESR_int_list_4	|
      // [disabled] L1P_ESR_int_speculation_0 |
      // [disabled] L1P_ESR_int_speculation_1 |
      // [disabled] L1P_ESR_int_speculation_2 |
      // [disabled] L1P_ESR_int_speculation_3 |
      // [disabled] L1P_ESR_err_valid_timeout | [see bqcbugs #1612]
      L1P_ESR_err_luq_ovfl |
      L1P_ESR_err_sr_p |
      L1P_ESR_err_sr_rd_valid_p |
      L1P_ESR_err_sw_p |
      L1P_ESR_err_si_ecc_ue |
      L1P_ESR_err_si_p |
      L1P_ESR_err_sda_p_ue |
      L1P_ESR_err_rqra_p |
      L1P_ESR_err_reload_ecc_ue_x2 |
      L1P_ESR_err_rira_p |
      L1P_ESR_err_gctr_p	|
      L1P_ESR_err_lu_state_p |
      L1P_ESR_err_lu_ttype |
      // [5470] L1P_ESR_err_lu_dcr_abort |
      L1P_ESR_err_mmio_async |
      L1P_ESR_err_mmio_state_p |
      L1P_ESR_err_mmio_timeout |
      L1P_ESR_err_mmio_priv |
      L1P_ESR_err_mmio_rdata_p |
      L1P_ESR_err_mmio_wdata_p |
      L1P_ESR_err_mmio_dcrs_timeout |
      L1P_ESR_err_mmio_dcrs_priv |
      L1P_ESR_err_mmio_dcrs_par |
      L1P_ESR_err_dcrm_crit |
      L1P_ESR_err_dcrm_noncrit |
      // [5470] L1P_ESR_err_dcrm_mc |
      L1P_ESR_err_tag_timeout |
      L1P_ESR_err_hold_timeout |
      L1P_ESR_err_ditc_req_x2 |
      L1P_ESR_err_pfd_addr_p |
      L1P_ESR_err_pfd_avalid_p |
      L1P_ESR_err_pfd_fill_pnd_p |
      L1P_ESR_err_pfd_hit_pnd_p |
      L1P_ESR_err_pfd_stream_p |
      L1P_ESR_err_pfd_depth_p |
      L1P_ESR_err_pfd_clone_p |
      L1P_ESR_err_hitq_p |
      L1P_ESR_err_sd_p |
      L1P_ESR_err_pf2dfc_p |
      L1P_ESR_err_wccm_p_x2 |
      L1P_ESR_err_wccm_wcd_p_x2 |
      L1P_ESR_err_lu_wcd_p |
      L1P_ESR_err_lu_current_p |
      L1P_ESR_err_l2cmd |
      L1P_ESR_err_lu_dcr_dbus_p |
      L1P_ESR_err_luq_p |
      L1P_ESR_err_sda_phase_p |
      L1P_ESR_slice_sel_ctrl_perr |
      L1P_ESR_redun_ctrl_perr
      ;

  // +------------------------------------------------------------------------------------------+
  // | NOTE: For production environments, we mask L1P correctables during the early part of the |
  // |       boot.  The TakeCPU hook is what allows us to unmask.                               |
  // +------------------------------------------------------------------------------------------+

  if ( ! PERS_ENABLED(PERS_ENABLE_TakeCPU) ) { 
      esr_gea |=
	  L1P_ESR_err_si_ecc |
	  L1P_ESR_err_reload_ecc_x2 |
	  L1P_ESR_err_sda_p
	  ;

  }

  out64_sync(
      (void *)L1P_ESR_GEA, 
      esr_gea
      );


#ifndef FW_PREINSTALLED_GEA_HANDLERS 

  uint64_t mask[3] = { L1P_GEA_MASK_0, L1P_GEA_MASK_1, L1P_GEA_MASK2  };

  fw_installGeaHandler( fw_l1p_machineCheckHandler, mask );

#endif


  unsigned core = ProcessorCoreID();

  DCRWritePriv( L1P_DCR(core,INTERRUPT_STATE_A_CONTROL_HIGH), 
		L1P_DCR__INTERRUPT_STATE_A_CONTROL_HIGH__LOCAL_RING_set(1)  | // Global Interrupt
		0 );

  DCRWritePriv( L1P_DCR(core,INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH), 
		// [5470] L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_ADDRESS_set(1)        | 
		// [5470] L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_PRIV_set(1)           | 
		L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_DATA_PARITY_set(1)    | 
		L1P_DCR__INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH__BAD_ADDRESS_PARITY_set(1) | 
		0 );

  if(A2_isDD1())
  {
      *(volatile uint64_t*)L1P_CFG_CLK_GATE = _B1(61,1);
  }
  else
  {
      *(volatile uint64_t*)L1P_CFG_CLK_GATE = L1P_CFG_CLK_GATE_clk_on_sw_req;
  }

  if(!FW_DD1_WORKAROUNDS_ENABLED()) 
  {
      *(volatile uint64_t*)L1P_CFG_CHICKEN |= L1P_CFG_CHICKEN_DD2;
  }
  

  fw_l1p_resetCEThresholds();


#if 0

  if ( ProcessorCoreID() == 2 ) {
  // DO NOT INTEGRATE THIS CODE!!!!!!!!!!!
  uint64_t inject = 
      L1P_ESR_err_reload_ecc_x2 |
      //L1P_ESR_err_si_ecc |
      //L1P_ESR_err_reload_ecc_ue_x2 |
      0  ;
	
  out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject );
  ppc_msync();
  out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 );
  ppc_msync();
  }

#endif


  TRACE_EXIT(TRACE_L1P);

  return( 0 );
}
Exemple #18
0
int fw_l1p_machineCheckHandler( uint64_t mappedStatus[] ) {

    uint64_t core, pass;
    fw_uint64_t details[17*2*6];
    unsigned n = 0;
    int rc = 0; // unless set otherwise below
    uint64_t correctableMask = FW_L1P_CORRECTABLE_MASK;

    // +--------------------------------------------------------------------+
    // | HW Issue 1596 workaround:  Disable DCR Arbiter Timeouts            |
    // +--------------------------------------------------------------------+

    uint64_t dcr_arbiter_ctrl_low = DCRReadPriv(DC_ARBITER_DCR( INT_REG_CONTROL_LOW));
    DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low & ~DC_ARBITER_DCR__INT_REG_CONTROL_LOW__NO_ACK_AFTER_REQ_set(-1) );

    for ( pass = 0; pass < 2; pass++ ) {

	n = 0;

	uint64_t statusMask = GEA_DCR__GEA_MAPPED_INTERRUPT_STATUS0_0__L1P0_RT_INT_set(1);

	for (core = 0; core < 17; core++, statusMask >>= 1 ) {

	    // +---------------------------------------------------------------------------------------+
	    // |  HW Issue 1596 : Only perform a cross-core ESR access if the mapped status indicates  |
	    // |                  that it is interesting to do so.  This reduces the exposure to the   |
	    // |                  hardware bug documented in that issue.                               |
	    // +---------------------------------------------------------------------------------------+

	    if ( ( mappedStatus[0] & statusMask ) != 0 ) {

		uint64_t status, error;

		status = fw_l1p_readCrossCore( L1P_ESR_DCR(core) );

		status &= ( ( pass == 0 ) ?  correctableMask :  ~correctableMask );

		if ( status == 0 )
		    continue;

		if ( pass == 0 ) {

		    fw_l1p_handleCorrectable( status, core );

		    // +------------------------------------------------------------------------------------------+
		    // | This funky little sequence seems to be required to clean up the A2 machine check bit     |
		    // | (bit 0) of the ESR.  It came from Krishnan  and does the following:                      |
		    // |                                                                                          |
		    // |     o   masks interrupts                                                                 |
		    // |     o   clears the MCSR[EXT] bit                                                         |
		    // |     o   clears the L1P_ESR[0] bit                                                        |
		    // |     o   re-enables interrupts                                                            |
		    // |                                                                                          |
		    // +------------------------------------------------------------------------------------------+
		
		    uint64_t thisCore = ProcessorCoreID();
		    uint64_t esrMask = in64( (void*)L1P_ESR_GEA_DCR(thisCore));
		    out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), 0 );
		    mtspr( SPRN_MCSR, mfspr(SPRN_MCSR) & ~MCSR_EXT );
		    out64_sync( (void*)L1P_ESR_DCR(thisCore), L1P_ESR_a2_machine_check );
		    out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), esrMask );
		}
		else {

		    details[n++] = L1P_ESR_DCR(core);
		    details[n++] = status;

		    status = L1P_DCR_PRIV_PTR(core)->interrupt_state_a__machine_check;
		    error  = L1P_DCR_PRIV_PTR(core)->interrupt_internal_error__machine_check;

		    if ( status != 0 ) {
			details[n++] = L1P_DCR( core,INTERRUPT_STATE_A__MACHINE_CHECK);
			details[n++] = status;
		    }
	    
		    if ( error != 0 ) {
			details[n++] = L1P_DCR(core,INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH);
			details[n++] = error;
		    }

		    rc = -1;
		}
	    }
 	}

	if ( n > 0 ) {
	    fw_machineCheckRas( FW_RAS_L1P_MACHINE_CHECK, details, n, __FILE__, __LINE__ );
	}
    }

    // +--------------------------------------------------------------------------+
    // | HW Issue 1596 workaround:  Force-clear any DCR timeouts and restore the  |
    // |   control register.                                                      |
    // +--------------------------------------------------------------------------+

    DCRWritePriv( DC_ARBITER_DCR( INT_REG__STATE ), DC_ARBITER_DCR__INT_REG__NO_ACK_AFTER_REQ_set(1) );
    ppc_msync();
    DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low );
	
    return rc;
}
Exemple #19
0
int test_main( void ) {

  int N = BgGetNumThreads();
  int irritatorThread = 1;
  int iterations = 1;

  char* irritator  = fwext_getenv("IRRITATOR");
  char* testId     = fwext_getenv("TEST");
  char* itersStr   = fwext_getenv("ITERATIONS");

  if ( irritator != 0 ) {
      irritatorThread = fwext_strtoul( irritator, 0, 0 );
  }

  if ( itersStr != 0 ) {
      iterations = fwext_strtoul( itersStr, 0, 0 );
  }

  fwext_barrier( &barrier, N );
  
  if ( ProcessorID() == irritatorThread ) {

      char* l2Slice = fwext_getenv("L2SLICE");
      char* mc      = fwext_getenv("MC");
      int   mci = 0;

      if ( mc != 0 ) {
	  mci = fwext_strtoul( mc, 0, 0 );
      }
   
      int i;

      for ( i = 0; i < iterations; i++ ) {

	  if ( i > 0 ) {
	      fwext_udelay( 250 * 1000 );
	  }

	  printf("Test: %s L2Slice:%s MC:%s NumThreads:%d Iter:%d\n", testId, l2Slice, mc ? mc : "?", N, i );
      
	  if ( fwext_strcmp("BeDRAM",testId) == 0 )  {
	      printf("Injecting ...\n");
	      uint64_t inject = BEDRAM_DCR__BEDRAM_INTERRUPT_STATUS__BEDRAM_EDRAM_ECC_set(1);
	      DCRWritePriv( BEDRAM_DCR( BEDRAM_INTERRUPT_STATUS__FORCE ), inject );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("ClkStopUnit",testId) == 0 )  {
	      uint64_t inject = CS_DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1);
	      DCRWritePriv( CS_DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("DrArbiter",testId) == 0 )  {
	      uint64_t inject = DC_ARBITER_DCR__INT_REG__RING_NOT_CLEAN_set(1);
	      DCRWritePriv( DC_ARBITER_DCR( INT_REG__FORCE ), inject );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("DDR",testId) == 0 )  {

	      DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE );
	      uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__XSTOP_set(1);
	      DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("DDR_MARKING_STORE",testId) == 0 )  {
	      DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_ECC_MARKING_STORE_UPDATED );
	      uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1);
	      DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("DDR_Correctable",testId) == 0 )  {
	      //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_WRITE_BUFFER_CE);
	      //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_CE );
	      DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE );
	      //uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1);
	      //DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("DDR_Threshold",testId) == 0 )  {
	      DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_ECC_ERROR_COUNTER_THRESHOLD_REACHED );
	      uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1);
	      DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("DevBus",testId) == 0 )  {
	      uint64_t inject = DEVBUS_DCR__DB_INTERRUPT_STATE__SLAVE_FIFO_PARITY_set(1);
	      DCRWritePriv( DEVBUS_DCR( DB_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("EnvMon",testId) == 0 )  {
	      uint64_t inject = EN_DCR__ENVMON_INTERRUPT_STATE__FSM_CHECKSUM_FAIL_set(1);
	      DCRWritePriv( EN_DCR( ENVMON_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("GEA",testId) == 0 )  {
	      uint64_t inject = GEA_DCR__GEA_INTERRUPT_STATE__DEVBUS_CTL_PERR_set(1);
	      DCRWritePriv( GEA_DCR( GEA_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }


#if 0
	  {
	      uint64_t inject = L1P_DCR__INTERRUPT_INTERNAL_ERROR__BAD_DATA_PARITY_set(1);
	      DCRWritePriv( L1P_DCR(0, INTERRUPT_INTERNAL_ERROR__FORCE ), inject );
	      ppc_msync();
	  }
#endif

	  if ( fwext_strcmp("L1P",testId) == 0 )  {
	  
	      //printf("Irritating L1 on core %d ...\n", irritatorThread);
#if 0
	      BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(0), BIC_MACHINE_CHECK );
	      BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(1), BIC_MACHINE_CHECK );
	      BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(2), BIC_MACHINE_CHECK );
	      BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(3), BIC_MACHINE_CHECK );
#endif

	      int64_t NN ;

	      printf("INJECTING!\n");
	      for ( NN = 0; NN < 10000000ull; NN++ ) {
	      uint64_t inject = 
		  L1P_ESR_err_reload_ecc_x2 |
		  //L1P_ESR_err_si_ecc |
		  //L1P_ESR_err_reload_ecc_ue_x2 |
		  0  ;
	
	      out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject );
	      ppc_msync();
	      out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 );
	      ppc_msync();
	      }

	      printf( "*********************************** Injected %ld CEs into L1P ***********************************************\n", NN);

	      //printf("L1P_ESR --> %lx\n", in64( (uint64_t*)L1P_ESR));
	  }

	  if ( fwext_strcmp("L1PBug",testId) == 0 )  {
	  

	      printf("Waiting for 2 seconds ...\n");

	      uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 2;

	      while ( GetTimeBase() < end );

	      printf("Irritating L1 on core %d ...\n", irritatorThread);

	      uint64_t inject = 
		  L1P_ESR_err_reload_ecc_x2 |
		  //L1P_ESR_err_si_ecc |
		  //L1P_ESR_err_reload_ecc_ue_x2 |
		  0  ;
	
	      printf("injecting %lx\n", inject);
	      out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject );
	      ppc_msync();
	      out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("L2",testId) == 0 )  {
	      uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_UE_set(1);
	      unsigned slice = fwext_strtoul( l2Slice, 0, 0 );
	      DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("L2_Correctable",testId) == 0 )  {

	      int II;

	      for ( II=0; II < 111101; II++ ) {
		  //uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_CE_set(1);
		  uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__EDR_CE_set(1);
		  unsigned slice = fwext_strtoul( l2Slice, 0, 0 );
		  DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject );
		  ppc_msync();
		  DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), 0 );
		  ppc_msync();
	      }

	      printf("Issued %d L2 CEs!\n", II);
	  }


	  if ( fwext_strcmp("L2CTR",testId) == 0 )  {
	      uint64_t inject = L2_COUNTER_DCR__L2_INTERRUPT_STATE__BDRY_PAR_ERR_set(1);
	      unsigned counter = fwext_strtoul( l2Slice, 0, 0 );
	      DCRWritePriv( L2_COUNTER_DCR( counter, L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("L2Central",testId) == 0 )  {
	      uint64_t inject = L2_CENTRAL_DCR__L2_INTERRUPT_STATE__ECC_UE_set(1);
	      DCRWritePriv( L2_CENTRAL_DCR( L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("Msgc",testId) == 0 )     {
	      uint64_t inject = MS_GENCT_DCR__L2_INTERRUPT_STATE__TIMEOUT_E_set(1);
	      DCRWritePriv( MS_GENCT_DCR( L2_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("MU",testId) == 0 )   {
	      // A fatal with simple intinfo:
	      uint64_t inject = MU_DCR__RME_INTERRUPTS0__RME_ERR7_set(1);
	      DCRWritePriv( MU_DCR( RME_INTERRUPTS0__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("MU_Correctable",testId) == 0 ) {
	      // Force a correctable error
	      uint64_t inject = MU_DCR__IMU_ECC_INTERRUPTS__IMU_ECC_CE1_set(1);
	      DCRWritePriv( MU_DCR( IMU_ECC_INTERRUPTS__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("ND",testId) == 0 ) {
	      // A fatal with simple intinfo:
	      uint64_t inject = ND_RESE_DCR__RESE_INTERRUPTS__LOCAL_RING_set(1);
	      DCRWritePriv( ND_RESE_DCR( 7, RESE_INTERRUPTS__FORCE ), inject );
	      ppc_msync();

	      //fwext_getFwInterface()->deprecated.backdoorTest(0);
	  }

	  if ( fwext_strcmp("PCIe",testId) == 0 ) {
	      ppc_msync();
	      uint64_t inject = PE_DCR__PCIE_INTERRUPT_STATE__CFG_PERR_set(1);
	      DCRWritePriv( PE_DCR( PCIE_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("SerDes0",testId) == 0 ) {
	      uint64_t inject = SERDES_LEFT_DCR__SERDES_INTERRUPT_STATE__A_PLLA_LOCK_LOST_set(1);
	      DCRWritePriv( SERDES_LEFT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }


	  if ( fwext_strcmp("SerDes1",testId) == 0 ) {


#if 0
	      // simulation of issue 1811
	      DCRWritePriv( SERDES_RIGHT_DCR(SERDES_INTERRUPT_STATE_CONTROL_HIGH), SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE_CONTROL_HIGH__D_PLLA_LOCK_LOST_set(2) );
	      ppc_msync();
#endif

	      uint64_t inject = SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE__D_PLLA_LOCK_LOST_set(1);
	      printf("inject->%016lX\n", inject);
	      DCRWritePriv( SERDES_RIGHT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

	  if ( fwext_strcmp("TestInt",testId) == 0 ) {
	      uint64_t inject = TESTINT_DCR__TI_INTERRUPT_STATE__INT_PARITY_ERROR_set(1);
	      DCRWritePriv( TESTINT_DCR( TI_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

#ifdef TESTUPC 

#define UPC_C_MMIO ((upc_c_dcr_t*)(PHYMAP_MINADDR_UPC | PHYMAP_PRIVILEGEDOFFSET))
	  {
	      UPC_C_MMIO->upc_c_interrupt_state__force =     
		  UPC_C_DCR__UPC_C_INTERRUPT_STATE__PRING_ERROR_set(1);           // An error was detected on the Processor/L2 UPC Daisy Chain 
	      ppc_msync();
	  }

#endif

#ifdef TESTWakeup
	  {
	      uint64_t inject = _DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1);
	      DCRWritePriv( _DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject );
	      ppc_msync();
	  }

#endif

      }
  }
  else {


      if ( fwext_strcmp("L1PBug",testId) == 0 ) {

	  if ( ( ProcessorID() % 4 ) == 0 ) {
	      uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 10;

	      while ( GetTimeBase() < end ) {
		  uint64_t esr = in64( (uint64_t*)L1P_ESR );
		  out64_sync((void *)L1P_ESR, ~esr );
	      }

	      //printf( "shutting down ...\n");
	  }
      }
  }

  
  fwext_barrier( &barrier, N );
  fwext_barrier( &barrier, N );

  exit(0);
}
Exemple #20
0
// Deliver an interprocessor interrupt - utility function
//      Parm1: target thread index. Valid values 0 through 67
//      Parm2: function pointer to be called 
//      Parm3: parameter supplied to the called function
IPIHANDLER_Fcn_t IPI_DeliverInterrupt(int processorID, IPIHANDLER_Fcn_t handler, uint64_t parm1, uint64_t parm2 )
{
    Kernel_WriteFlightLog(FLIGHTLOG, FL_DELIVRIPI, processorID, (uint64_t)handler, parm1, parm2);
    
    //printf("Sending IPI to threadid %d\n", thread_index);
    // Validate the target
    if ((processorID) >= 0 && (processorID < CONFIG_MAX_CORES*CONFIG_HWTHREADS_PER_CORE))
    {
        // Get my hardware thread state object
        HWThreadState_t *pHwt = GetHWThreadStateByProcessorID(ProcessorID());
        // Determine if there is a previous request that has not completed
        if (pHwt->ipi_message[processorID].fcn)
        {
            // If the target hwthread is the same as our hardware thread, do not deadlock.
            // instead return the function pointer of the IPI that has not been handled.
            // If the caller is sending an IPI to itself, it must be prepared to handle this situation
            if (processorID == ProcessorID())
                return (pHwt->ipi_message[processorID].fcn);

            // Lower our hwthread priority and spin waiting for the target thread to process a previous request 
            ThreadPriority_Low();
            while(pHwt->ipi_message[processorID].fcn) 
            {
                // Is the target hardware thread beyond the point of accepting interrupts
                if (pHwt->appExitPhase == AppExit_Phase2)
                {
                    // Just return. The target thread is essentially gone. 
                    // We should not be here if an IPI is being delivered from/to application or agent threads 
                    // of a job since in those conditions, the application processes and agents would not have 
                    // exited phase 1 of AppExit. We should only be here if the Tool control thread is attempting 
                    // to send an IPI to a thread that has since entered phase 2 of AppExit.
                    ThreadPriority_Medium();
                    return NULL;
                }
                // Is a process_exit message pending against this hwthread and is the kthread which is attempting this IPI delivery 
                // in the same process as the target of the exit operation? Also, are we trying to send an IPI to the same hardware 
                // thread that has sent the IPI exit message to us? If all of these are true, we need to get out of the way since the 
                // sender of the process_exit IPI will be waiting in a barrier for all of the threads of it's process to arrive. 
                // Just abort this IPI since at this point there is nothing more important than allowing the exit to proceed.
                int i;
                for (i=0; i<64; i++)
                {
                    IPI_Message_t* pIPImsg = (IPI_Message_t*)&(NodeState.CoreState[i/4].HWThreads[i%4].ipi_message[ProcessorID()]);
                    if ((pIPImsg->fcn == IPI_handler_process_exit) &&  // Is there a process exit message pending to this hardare thread?
                        (GetProcessByProcessorID(ProcessorID()) == GetMyKThread()->pAppProc) && // Are we running in a kthread that is part of this hardware thread's process?
                        (i == processorID)) // Are we trying to send and IPI to the same hardware thread that has sent the IPI exit message to us?
                    {
                        // This is a situation that will not clear on its own. Toss the delivery of this IPI and allow the exit to proceed.
                        ThreadPriority_Medium();
                        return NULL;
                    }
                }
                // Is there an IPI pending directed to us which requires an ACK by the sender. If this is true, we must process the
                // pending request so that the target can proceed to an interruptable point and accept the previously pending request
                // thereby allowing us to make our request pending.
                IPI_DeadlockAvoidance(processorID);
            }
            ThreadPriority_Medium();
        }
        pHwt->ipi_message[processorID].fcn = handler;
        pHwt->ipi_message[processorID].param1 = parm1;
        pHwt->ipi_message[processorID].param2 = parm2;
        ppc_msync();
        // Write to the c2c send register
        BIC_REGISTER send_value = 0;
        send_value |= ((processorID & 0x3)+1);          // Set the thread index value (values 1 through 4)
        send_value |= ((BIC_C2C_INTTYPE_EXTERNAL) << 3); // Indicate delivery as an External interrupt
        send_value |= (_BN(processorID>>2)) >> 42;      // Set the core mask
        BIC_WriteInterruptSend(send_value);
    }
Exemple #21
0
int fw_sync_timebase( void ) 
{

    uint64_t numloops = 10;
    uint64_t value;
    uint64_t rc;
    Personality_t *pers = &FW_Personality;    
    uint64_t numthreads;
    uint64_t msr;
    uint64_t geamap8 = 0;
    
    if(!PERS_ENABLED(PERS_ENABLE_MU))
        return 0;    

    if(!PERS_ENABLED(PERS_ENABLE_ND))
        return 0;    
    
    msr = mfmsr();
    mtmsr(msr & ~(MSR_EE | MSR_CE | MSR_ME));
    isync();
    
    numthreads = popcnt64(DCRReadPriv(TESTINT_DCR(THREAD_ACTIVE0))) + popcnt64(DCRReadPriv(TESTINT_DCR(THREAD_ACTIVE1)));
    if(PhysicalThreadID() == 0)
    {
#define WU_MMIO_PRIV_BASE ((volatile unsigned long *)0x3ffe8001c00)
#define SET_THREAD(i)           ((0x300 + (i)*0x40) / sizeof (unsigned long))
        WU_MMIO_PRIV_BASE[SET_THREAD(0)] = WU_DCR__THREAD0_WU_EVENT_SET__GEA_WU_EN_set(0x8);
        
        if(ProcessorID() == 0)
        {
            // Setup classroute 14.  Identical to classroute 15.
            value = DCRReadPriv(ND_500_DCR(CTRL_GI_CLASS_14_15));
            ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_I_insert(value, ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS15_UP_PORT_I_get(value));
            ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_O_insert(value, ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS15_UP_PORT_O_get(value));
            DCRWritePriv(ND_500_DCR(CTRL_GI_CLASS_14_15), value);
            ppc_msync();
            
            // Initialize GI pulse
            MUSPI_GIInit (&GI, 14, 0);
            
            // Initialize the GI barrier interrupt on classroute 14
            DCRWritePriv(MU_DCR(BARRIER_INT_EN), MU_DCR__BARRIER_INT_EN__CLASS14_set(4));
            
            // Route MU MAP4 interrupt to GEA lane 12 (wakeup unit bit 0)
            geamap8 = DCRReadPriv(GEA_DCR(GEA_INTERRUPT_MAP8));
            DCRWritePriv(GEA_DCR(GEA_INTERRUPT_MAP8), GEA_DCR__GEA_INTERRUPT_MAP8__MU_MAP4_set(12));
            rc = MUSPI_GIBarrierInit(&GIBarrier, 15);
        }
        
        // do local barrier
        BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER);
        while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads)
        {
        }
        
        if(ProcessorID() == 0)
        {
            // Perform a barrier across all nodes.
            MUSPI_GIBarrierEnterAndWait(&GIBarrier);
            if ( rc != 0 )
            {
                FW_Warning("MUSPI_GIBarrierInit for class route 15 returned rc = %ld.", rc);
                return -1;
            }
            
            // Start gsync counter (for debug)
            DCRWritePriv(TESTINT_DCR(GSYNC_CTR), -1);
        }
        doTimeSync(numloops);
        mtspr(SPRN_TENS, 0xf);
    }
    else if((ProcessorID() == 1) && (pers->Network_Config.PrimordialClassRoute.GlobIntUpPortOutputs == 0))
    {
        BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER);
        createSendGIPulseThread(numloops);
    }
    else
    {
        BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER);
        mtspr(SPRN_TENC, 1 << ProcessorThreadID());
        isync();
    }
    
    // Wait for all hwthreads on node
    BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER);
    while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads * 2)
    {
    }
    
    if(ProcessorID() == 0)
    {
        value = DCRReadPriv(ND_500_DCR(CTRL_GI_CLASS_14_15));
        ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_I_insert(value, 0);
        ND_500_DCR__CTRL_GI_CLASS_14_15__CLASS14_UP_PORT_O_insert(value, 0);
        DCRWritePriv(ND_500_DCR(CTRL_GI_CLASS_14_15), value);
        ppc_msync();
        
        // Initialize the barrier structure.  
        DCRWritePriv(MU_DCR(BARRIER_INT_EN), MU_DCR__BARRIER_INT_EN__CLASS14_set(0));
        DCRWritePriv(GEA_DCR(GEA_INTERRUPT_MAP8), geamap8);
    }
    WU_MMIO_PRIV_BASE[SET_THREAD(0)] = WU_DCR__THREAD0_WU_EVENT_SET__GEA_WU_EN_set(0);
    
    BeDRAM_ReadIncSat(BeDRAM_LOCKNUM_TIMESYNC_BARRIER);
    while(BeDRAM_Read(BeDRAM_LOCKNUM_TIMESYNC_BARRIER) != numthreads * 3)
    {
    }
    
    mtmsr(msr);
    isync();
    
    return 0;
}
Exemple #22
0
// Note: they will/must wake in FIFO order.
uint64_t Futex_Wait( uint32_t op_and_flags, KThread_t *pKThr, Futex_t* futex_vaddr, uint32_t futex_val, uint64_t timeout, uint64_t current_time )
{
    int thd_index = ProcessorID();
    KThread_t *FtQ;
    int isShared = Futex_IsShared(futex_vaddr, op_and_flags);

    // Enter Short-Term Critical Section and Grab the Atomic Operations Control Lock

    //printf("[%d] before-crit:  futex@%x = %d\n", core, futex_vaddr, *((int*)futex_vaddr));
    uint64_t my_turn = Lock_AtomicAcquire(isShared);
    //printf("[%d] after-crit:   futex@%x = %d\n", core, futex_vaddr, *((int*)futex_vaddr));

    //_FutexRecord( _FUTHIST_WAIT, futex_vaddr, futex_val, *((uint32_t*)futex_vaddr) );



    if ( *((uint32_t*)futex_vaddr) != futex_val )
    {
        TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: EWOULDBLOCK address 0x%016lx futex_val %016lx, *futex_vaddr %d\n",
                               thd_index, (uint64_t)futex_vaddr, (uint64_t)futex_val, *((uint32_t*)futex_vaddr) ));
        Lock_AtomicRelease(isShared, my_turn);

        return CNK_RC_FAILURE(EWOULDBLOCK);
    }

    TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: WAITING address 0x%016lx (kthread=0x%p) time=0x%016lx\n", 
                         thd_index, (uint64_t)futex_vaddr, pKThr, current_time));

    Futex_State_t* futexTableEntry = Futex_findTableEntry(futex_vaddr, isShared, 1);

    if (futexTableEntry)
    {
        if (futexTableEntry->pKThr_Waiter_Next)
        {
            for (FtQ = futexTableEntry->pKThr_Waiter_Next; FtQ->FutexQueueNext; FtQ = FtQ->FutexQueueNext)
            {
            }
            FtQ->FutexQueueNext = pKThr;
        }
        else
        {
            futexTableEntry->pKThr_Waiter_Next = pKThr;
        }

        pKThr->FutexQueueNext = (KThread_t *)0;
        pKThr->FutexVAddr     = futex_vaddr;
        pKThr->FutexValue     = *futex_vaddr;
        pKThr->FutexTimeout   = timeout;
        pKThr->FutexIsShared  = isShared;
        ppc_msync();
        // If this is a timed futex. Setup the udecr timer 
        if (timeout)
        {
            if (timeout < current_time)
            {
                timeout = current_time+1;
            }
            Timer_enableFutexTimeout(current_time, timeout);
        }
    }
    else
    {
        TRACE( TRACE_Futex, ("(I) Futex_Wait[%d]: futexTableEntry == NULL\n", thd_index));
    }

    Sched_Block(GetMyKThread(), SCHED_STATE_FUTEX );
    Lock_AtomicRelease(isShared, my_turn);
    // TRACE( TRACE_Futex, ("(<) %s[%d]\n", __func__, core));

    // The thread's non-volatile state has not been saved yet.  Set a Pending
    // bit, which will result in a full state save and a call to Scheduler().
    pKThr->Pending |= KTHR_PENDING_YIELD;

    return CNK_RC_SUCCESS(0);
}
Exemple #23
0
static int Futex_WakeQueue(Futex_State_t* futexTableEntry, int maxToWake, Futex_State_t* secondaryFutexTableEntry, uint16_t *unblockList)
{

    int numberAwoken = 0;
    int thd_index = ProcessorID();

    TRACE( TRACE_Futex, ("(I) %s[%d]: table:%p (vaddr:%p) num2wake:%d secondary:%p\n",
                           __func__, thd_index,
                           futexTableEntry, (futexTableEntry ? futexTableEntry->futex_vaddr : 0LL),
                           maxToWake,
                           secondaryFutexTableEntry));

    if (!futexTableEntry)
    {
        TRACE( TRACE_Futex, ("(I) %s[%d]: futexTableEntry == NULL\n",  __func__, thd_index));
        return numberAwoken;
    }

    KThread_t* thread;

    for (thread = futexTableEntry->pKThr_Waiter_Next; thread; thread = futexTableEntry->pKThr_Waiter_Next)
    {

        // TRACE( TRACE_Futex, ("(D) %s[%d]: process thread:%08x (next->%08x\n", __func__, core, (unsigned)thread, (unsigned)(thread->FutexQueueNext)));

        // If we haven't hit the limit, wake up the thread:
        if ( numberAwoken < maxToWake )
        {
            // Advance the futex table to the next entry now because we will be destroying the link
            // in the current entry:
            futexTableEntry->pKThr_Waiter_Next = thread->FutexQueueNext;
            TRACE( TRACE_Futex, ("(I) %s[%d]: waking kthread:%p processorid:%d for futex:%p\n", __func__, thd_index, thread, thread->ProcessorID, thread->FutexVAddr));

            thread->Reg_State.gpr[3] = 0; // this is the result of the futex syscall
            thread->FutexQueueNext   = (KThread_t *)0;
            thread->FutexVAddr       = NULL;
            thread->FutexValue       = 0;
            thread->FutexTimeout     = 0;
            thread->FutexIsShared    = 0;
            //thread->pad3 = 1; // TEMP PROBLEM ANALYSIS
	    unblockList[numberAwoken] = GetTID(thread); // save the kthread in an abbreviated 2-byte format.
            numberAwoken++;
            ppc_msync();
        }
        else if ( secondaryFutexTableEntry )
        {
            // We are requeueing ... move the entire FIFO to the secondary
            // futex queue.  Since the FIFO is a linked list, we can do
            // this by simply snipping off the remaining chain and pasting
            // it onto the secondary:

            KThread_t** tail;

            for (tail = &(secondaryFutexTableEntry->pKThr_Waiter_Next); *tail; tail = &((*tail)->FutexQueueNext)); // find the end of the secondary queue

            futexTableEntry->pKThr_Waiter_Next = 0; // The old queue is now empty
            *tail = thread;                            // Paste the queue remainder onto the end of the existing queue

            // Now update the entries
            for ( ; thread; thread = thread->FutexQueueNext )
            {
                thread->FutexVAddr = secondaryFutexTableEntry->futex_vaddr;
                thread->FutexValue = *thread->FutexVAddr;
                ppc_msync();
            }

            TRACE( TRACE_Futex, ("(I) %s[%d]: some waiters requeued to futex:%016lx (sys:%p) \n",
                                   __func__, thd_index, (uint64_t)secondaryFutexTableEntry->futex_vaddr, secondaryFutexTableEntry ));
        }
        else
        {
            break;
        }
    }

    // If the waiter list is now empty, then remove the entry from the table:
    if ( futexTableEntry->pKThr_Waiter_Next == NULL )
    {
        futexTableEntry->futex_vaddr = 0;
        ppc_msync();
    }
    if ( secondaryFutexTableEntry && (secondaryFutexTableEntry->pKThr_Waiter_Next == NULL) )
    {
        secondaryFutexTableEntry->futex_vaddr = 0;
        ppc_msync();
    }

    // TRACE( TRACE_Futex, ("(<) %s[%d]\n", __func__, core));
    return numberAwoken;
}