int l1p_init() { // Restore A2 hardware thread priority ThreadPriority_Medium(); if(PhysicalThreadID() != 0) return 0; // Restore prefetcher state CoreState_t* cs = GetCoreStateByCore(PhysicalProcessorID()); if(cs->default_l1p_init) { out64((void *)L1P_CFG_SPEC, cs->default_l1p_cfgspec); out64((void *)L1P_CFG_PF_USR, cs->default_l1p_cfgpfusr); out64((void *)L1P_CFG_PF_SYS, cs->default_l1p_cfgpfsys); ppc_msync(); } else { ppc_msync(); cs->default_l1p_cfgspec = in64((void*)L1P_CFG_SPEC); cs->default_l1p_cfgpfusr = in64((void*)L1P_CFG_PF_USR); cs->default_l1p_cfgpfsys = in64((void*)L1P_CFG_PF_SYS); cs->default_l1p_init = 1; ppc_msync(); } return 0; }
static int xicor_read(uint8_t addr) { while (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_BUSY) ; out64((addr >> 8) & 0x7, SMB_CSR(R_SMB_CMD)); out64((addr & 0xff), SMB_CSR(R_SMB_DATA)); out64((V_SMB_ADDR(X1241_CCR_ADDRESS) | V_SMB_TT_WR2BYTE), SMB_CSR(R_SMB_START)); while (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_BUSY) ; out64((V_SMB_ADDR(X1241_CCR_ADDRESS) | V_SMB_TT_RD1BYTE), SMB_CSR(R_SMB_START)); while (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_BUSY) ; if (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_ERROR) { /* Clear error bit by writing a 1 */ out64(M_SMB_ERROR, SMB_CSR(R_SMB_STATUS)); return -1; } return (in64(SMB_CSR(R_SMB_DATA)) & 0xff); }
static void check_bus_watcher(void) { uint32_t status, l2_err, memio_err; #ifdef DUMP_L2_ECC_TAG_ON_ERROR uint64_t l2_tag; #endif /* Destructive read, clears register and interrupt */ status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); /* Bit 31 is always on, but there's no #define for that */ if (status & ~(1UL << 31)) { l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); #ifdef DUMP_L2_ECC_TAG_ON_ERROR l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG); #endif memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); prom_printf("\nLast recorded signature:\n"); prom_printf("Request %02x from %d, answered by %d with Dcode %d\n", (unsigned int)(G_SCD_BERR_TID(status) & 0x3f), (int)(G_SCD_BERR_TID(status) >> 6), (int)G_SCD_BERR_RID(status), (int)G_SCD_BERR_DCODE(status)); #ifdef DUMP_L2_ECC_TAG_ON_ERROR prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); #endif } else {
/* * We use our own do_gettimeoffset() instead of the generic one, * because the generic one does not work for SMP case. * In addition, since we use general timer 0 for system time, * we can get accurate intra-jiffy offset without calibration. */ unsigned long sb1250_gettimeoffset(void) { unsigned long count = in64(KSEG1 + A_SCD_TIMER_REGISTER(0, R_SCD_TIMER_CNT)); return 1000000/HZ - count; }
uint64_t fw_l1p_readCrossCore( uint64_t address ) { int retries = 1000; uint64_t result = 0, esr = 0; while ( ( --retries ) > 0 ) { result = in64( (void *) address ); esr = in64( (void*)L1P_ESR_DCR(ProcessorCoreID() ) ); if ( ( esr & L1P_ESR_err_lu_dcr_abort ) == 0 ) { return result; } out64_sync( (void*)L1P_ESR_DCR(ProcessorCoreID()), L1P_ESR_err_lu_dcr_abort ); } FW_Error( "Could not perform cross-core L1P register read addr=%X esr=%X.", address, esr ); return -1; }
static int xicor_write(uint8_t addr, int b) { while (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_BUSY) ; out64(addr, SMB_CSR(R_SMB_CMD)); out64((addr & 0xff) | ((b & 0xff) << 8), SMB_CSR(R_SMB_DATA)); out64(V_SMB_ADDR(X1241_CCR_ADDRESS) | V_SMB_TT_WR3BYTE, SMB_CSR(R_SMB_START)); while (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_BUSY) ; if (in64(SMB_CSR(R_SMB_STATUS)) & M_SMB_ERROR) { /* Clear error bit by writing a 1 */ out64(M_SMB_ERROR, SMB_CSR(R_SMB_STATUS)); return -1; } else { return 0; } }
void fw_l1p_unmaskCorrectableErrors() { if ( PERS_ENABLED(PERS_ENABLE_TakeCPU) ) { uint64_t esr_gea = in64( (void *)L1P_ESR_GEA ); esr_gea |= L1P_ESR_err_si_ecc | L1P_ESR_err_reload_ecc_x2 | L1P_ESR_err_sda_p ; out64_sync( (void *)L1P_ESR_GEA, esr_gea ); } }
void fw_l1p_writeCrossCore( uint64_t address, uint64_t value ) { int retries = 1000; uint64_t esr = 0; while ( ( --retries ) > 0 ) { out64_sync( (void *) address, value ); esr = in64( (void*)L1P_ESR_DCR(ProcessorCoreID() ) ); if ( ( esr & L1P_ESR_err_lu_dcr_abort ) == 0 ) { return; } out64_sync( (void*)L1P_ESR_DCR(ProcessorCoreID()), L1P_ESR_err_lu_dcr_abort ); } FW_Error( "Could not perform cross-core L1P register write addr=%X value=%X esr=%X.", address, value, esr ); }
int main(int argc, char * argv[]) { num_threads = (argc>1) ? atoi(argv[1]) : 1; printf("L2 counter test using %d threads \n", num_threads ); //printf("sizeof(BGQ_Atomic64_t) = %zu \n", sizeof(BGQ_Atomic64_t) ); /* this "activates" the L2 atomic data structures */ Kernel_L2AtomicsAllocate(&counter, sizeof(BGQ_Atomic64_t) ); L2_AtomicStore(&(counter.atom), 0); out64_sync(&(counter.atom), 0); pool = (pthread_t *) malloc( num_threads * sizeof(pthread_t) ); assert(pool!=NULL); /**************************************************/ for (int i=0; i<num_threads; i++) { int rc = pthread_create(&(pool[i]), NULL, &fight, NULL); if (rc!=0) { printf("pthread error \n"); fflush(stdout); sleep(1); } assert(rc==0); } if (debug) { printf("threads created \n"); fflush(stdout); } for (int i=0; i<num_threads; i++) { void * junk; int rc = pthread_join(pool[i], &junk); if (rc!=0) { printf("pthread error \n"); fflush(stdout); sleep(1); } assert(rc==0); } if (debug) { printf("threads joined \n"); fflush(stdout); } uint64_t rval = L2_AtomicLoad(&(counter.atom)); printf("final value of counter is %llu \n", rval); /**************************************************/ for (int i=0; i<num_threads; i++) { int rc = pthread_create(&(pool[i]), NULL, &slowfight, NULL); if (rc!=0) { printf("pthread error \n"); fflush(stdout); sleep(1); } assert(rc==0); } printf("threads created \n"); fflush(stdout); for (int i=0; i<num_threads; i++) { void * junk; int rc = pthread_join(pool[i], &junk); if (rc!=0) { printf("pthread error \n"); fflush(stdout); sleep(1); } assert(rc==0); } printf("threads joined \n"); fflush(stdout); rval = in64(&(slowcounter.atom)); printf("final value of slowcounter is %llu \n", rval); /**************************************************/ free(pool); return 0; }
int fw_l1p_machineCheckHandler( uint64_t mappedStatus[] ) { uint64_t core, pass; fw_uint64_t details[17*2*6]; unsigned n = 0; int rc = 0; // unless set otherwise below uint64_t correctableMask = FW_L1P_CORRECTABLE_MASK; // +--------------------------------------------------------------------+ // | HW Issue 1596 workaround: Disable DCR Arbiter Timeouts | // +--------------------------------------------------------------------+ uint64_t dcr_arbiter_ctrl_low = DCRReadPriv(DC_ARBITER_DCR( INT_REG_CONTROL_LOW)); DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low & ~DC_ARBITER_DCR__INT_REG_CONTROL_LOW__NO_ACK_AFTER_REQ_set(-1) ); for ( pass = 0; pass < 2; pass++ ) { n = 0; uint64_t statusMask = GEA_DCR__GEA_MAPPED_INTERRUPT_STATUS0_0__L1P0_RT_INT_set(1); for (core = 0; core < 17; core++, statusMask >>= 1 ) { // +---------------------------------------------------------------------------------------+ // | HW Issue 1596 : Only perform a cross-core ESR access if the mapped status indicates | // | that it is interesting to do so. This reduces the exposure to the | // | hardware bug documented in that issue. | // +---------------------------------------------------------------------------------------+ if ( ( mappedStatus[0] & statusMask ) != 0 ) { uint64_t status, error; status = fw_l1p_readCrossCore( L1P_ESR_DCR(core) ); status &= ( ( pass == 0 ) ? correctableMask : ~correctableMask ); if ( status == 0 ) continue; if ( pass == 0 ) { fw_l1p_handleCorrectable( status, core ); // +------------------------------------------------------------------------------------------+ // | This funky little sequence seems to be required to clean up the A2 machine check bit | // | (bit 0) of the ESR. It came from Krishnan and does the following: | // | | // | o masks interrupts | // | o clears the MCSR[EXT] bit | // | o clears the L1P_ESR[0] bit | // | o re-enables interrupts | // | | // +------------------------------------------------------------------------------------------+ uint64_t thisCore = ProcessorCoreID(); uint64_t esrMask = in64( (void*)L1P_ESR_GEA_DCR(thisCore)); out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), 0 ); mtspr( SPRN_MCSR, mfspr(SPRN_MCSR) & ~MCSR_EXT ); out64_sync( (void*)L1P_ESR_DCR(thisCore), L1P_ESR_a2_machine_check ); out64_sync( (void*)L1P_ESR_GEA_DCR(thisCore), esrMask ); } else { details[n++] = L1P_ESR_DCR(core); details[n++] = status; status = L1P_DCR_PRIV_PTR(core)->interrupt_state_a__machine_check; error = L1P_DCR_PRIV_PTR(core)->interrupt_internal_error__machine_check; if ( status != 0 ) { details[n++] = L1P_DCR( core,INTERRUPT_STATE_A__MACHINE_CHECK); details[n++] = status; } if ( error != 0 ) { details[n++] = L1P_DCR(core,INTERRUPT_INTERNAL_ERROR_CONTROL_HIGH); details[n++] = error; } rc = -1; } } } if ( n > 0 ) { fw_machineCheckRas( FW_RAS_L1P_MACHINE_CHECK, details, n, __FILE__, __LINE__ ); } } // +--------------------------------------------------------------------------+ // | HW Issue 1596 workaround: Force-clear any DCR timeouts and restore the | // | control register. | // +--------------------------------------------------------------------------+ DCRWritePriv( DC_ARBITER_DCR( INT_REG__STATE ), DC_ARBITER_DCR__INT_REG__NO_ACK_AFTER_REQ_set(1) ); ppc_msync(); DCRWritePriv( DC_ARBITER_DCR( INT_REG_CONTROL_LOW), dcr_arbiter_ctrl_low ); return rc; }
int test_main( void ) { int N = BgGetNumThreads(); int irritatorThread = 1; int iterations = 1; char* irritator = fwext_getenv("IRRITATOR"); char* testId = fwext_getenv("TEST"); char* itersStr = fwext_getenv("ITERATIONS"); if ( irritator != 0 ) { irritatorThread = fwext_strtoul( irritator, 0, 0 ); } if ( itersStr != 0 ) { iterations = fwext_strtoul( itersStr, 0, 0 ); } fwext_barrier( &barrier, N ); if ( ProcessorID() == irritatorThread ) { char* l2Slice = fwext_getenv("L2SLICE"); char* mc = fwext_getenv("MC"); int mci = 0; if ( mc != 0 ) { mci = fwext_strtoul( mc, 0, 0 ); } int i; for ( i = 0; i < iterations; i++ ) { if ( i > 0 ) { fwext_udelay( 250 * 1000 ); } printf("Test: %s L2Slice:%s MC:%s NumThreads:%d Iter:%d\n", testId, l2Slice, mc ? mc : "?", N, i ); if ( fwext_strcmp("BeDRAM",testId) == 0 ) { printf("Injecting ...\n"); uint64_t inject = BEDRAM_DCR__BEDRAM_INTERRUPT_STATUS__BEDRAM_EDRAM_ECC_set(1); DCRWritePriv( BEDRAM_DCR( BEDRAM_INTERRUPT_STATUS__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("ClkStopUnit",testId) == 0 ) { uint64_t inject = CS_DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1); DCRWritePriv( CS_DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DrArbiter",testId) == 0 ) { uint64_t inject = DC_ARBITER_DCR__INT_REG__RING_NOT_CLEAN_set(1); DCRWritePriv( DC_ARBITER_DCR( INT_REG__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__XSTOP_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_MARKING_STORE",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_ECC_MARKING_STORE_UPDATED ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_Correctable",testId) == 0 ) { //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_WRITE_BUFFER_CE); //DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_MEMORY_CE ); DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_POWERBUS_READ_BUFFER_SUE ); //uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); //DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DDR_Threshold",testId) == 0 ) { DCRWritePriv( _DDR_MC_MCFIRS(mci), MCFIR_ECC_ERROR_COUNTER_THRESHOLD_REACHED ); uint64_t inject = DR_ARB_DCR__L2_INTERRUPT_STATE__RECOV_ERR_set(1); DCRWritePriv( DR_ARB_DCR(mci, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("DevBus",testId) == 0 ) { uint64_t inject = DEVBUS_DCR__DB_INTERRUPT_STATE__SLAVE_FIFO_PARITY_set(1); DCRWritePriv( DEVBUS_DCR( DB_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("EnvMon",testId) == 0 ) { uint64_t inject = EN_DCR__ENVMON_INTERRUPT_STATE__FSM_CHECKSUM_FAIL_set(1); DCRWritePriv( EN_DCR( ENVMON_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("GEA",testId) == 0 ) { uint64_t inject = GEA_DCR__GEA_INTERRUPT_STATE__DEVBUS_CTL_PERR_set(1); DCRWritePriv( GEA_DCR( GEA_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #if 0 { uint64_t inject = L1P_DCR__INTERRUPT_INTERNAL_ERROR__BAD_DATA_PARITY_set(1); DCRWritePriv( L1P_DCR(0, INTERRUPT_INTERNAL_ERROR__FORCE ), inject ); ppc_msync(); } #endif if ( fwext_strcmp("L1P",testId) == 0 ) { //printf("Irritating L1 on core %d ...\n", irritatorThread); #if 0 BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(0), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(1), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(2), BIC_MACHINE_CHECK ); BIC_InsertInterruptMap( 0, BIC_MAP_L1P_LANE(3), BIC_MACHINE_CHECK ); #endif int64_t NN ; printf("INJECTING!\n"); for ( NN = 0; NN < 10000000ull; NN++ ) { uint64_t inject = L1P_ESR_err_reload_ecc_x2 | //L1P_ESR_err_si_ecc | //L1P_ESR_err_reload_ecc_ue_x2 | 0 ; out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject ); ppc_msync(); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 ); ppc_msync(); } printf( "*********************************** Injected %ld CEs into L1P ***********************************************\n", NN); //printf("L1P_ESR --> %lx\n", in64( (uint64_t*)L1P_ESR)); } if ( fwext_strcmp("L1PBug",testId) == 0 ) { printf("Waiting for 2 seconds ...\n"); uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 2; while ( GetTimeBase() < end ); printf("Irritating L1 on core %d ...\n", irritatorThread); uint64_t inject = L1P_ESR_err_reload_ecc_x2 | //L1P_ESR_err_si_ecc | //L1P_ESR_err_reload_ecc_ue_x2 | 0 ; printf("injecting %lx\n", inject); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), inject ); ppc_msync(); out64_sync((void *)L1P_ESR_INJ_DCR(ProcessorCoreID()), 0 ); ppc_msync(); } if ( fwext_strcmp("L2",testId) == 0 ) { uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_UE_set(1); unsigned slice = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("L2_Correctable",testId) == 0 ) { int II; for ( II=0; II < 111101; II++ ) { //uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__DIRB_CE_set(1); uint64_t inject = L2_DCR__L2_INTERRUPT_STATE__EDR_CE_set(1); unsigned slice = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); DCRWritePriv( L2_DCR( slice, L2_INTERRUPT_STATE__FORCE ), 0 ); ppc_msync(); } printf("Issued %d L2 CEs!\n", II); } if ( fwext_strcmp("L2CTR",testId) == 0 ) { uint64_t inject = L2_COUNTER_DCR__L2_INTERRUPT_STATE__BDRY_PAR_ERR_set(1); unsigned counter = fwext_strtoul( l2Slice, 0, 0 ); DCRWritePriv( L2_COUNTER_DCR( counter, L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("L2Central",testId) == 0 ) { uint64_t inject = L2_CENTRAL_DCR__L2_INTERRUPT_STATE__ECC_UE_set(1); DCRWritePriv( L2_CENTRAL_DCR( L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("Msgc",testId) == 0 ) { uint64_t inject = MS_GENCT_DCR__L2_INTERRUPT_STATE__TIMEOUT_E_set(1); DCRWritePriv( MS_GENCT_DCR( L2_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("MU",testId) == 0 ) { // A fatal with simple intinfo: uint64_t inject = MU_DCR__RME_INTERRUPTS0__RME_ERR7_set(1); DCRWritePriv( MU_DCR( RME_INTERRUPTS0__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("MU_Correctable",testId) == 0 ) { // Force a correctable error uint64_t inject = MU_DCR__IMU_ECC_INTERRUPTS__IMU_ECC_CE1_set(1); DCRWritePriv( MU_DCR( IMU_ECC_INTERRUPTS__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("ND",testId) == 0 ) { // A fatal with simple intinfo: uint64_t inject = ND_RESE_DCR__RESE_INTERRUPTS__LOCAL_RING_set(1); DCRWritePriv( ND_RESE_DCR( 7, RESE_INTERRUPTS__FORCE ), inject ); ppc_msync(); //fwext_getFwInterface()->deprecated.backdoorTest(0); } if ( fwext_strcmp("PCIe",testId) == 0 ) { ppc_msync(); uint64_t inject = PE_DCR__PCIE_INTERRUPT_STATE__CFG_PERR_set(1); DCRWritePriv( PE_DCR( PCIE_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("SerDes0",testId) == 0 ) { uint64_t inject = SERDES_LEFT_DCR__SERDES_INTERRUPT_STATE__A_PLLA_LOCK_LOST_set(1); DCRWritePriv( SERDES_LEFT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("SerDes1",testId) == 0 ) { #if 0 // simulation of issue 1811 DCRWritePriv( SERDES_RIGHT_DCR(SERDES_INTERRUPT_STATE_CONTROL_HIGH), SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE_CONTROL_HIGH__D_PLLA_LOCK_LOST_set(2) ); ppc_msync(); #endif uint64_t inject = SERDES_RIGHT_DCR__SERDES_INTERRUPT_STATE__D_PLLA_LOCK_LOST_set(1); printf("inject->%016lX\n", inject); DCRWritePriv( SERDES_RIGHT_DCR( SERDES_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } if ( fwext_strcmp("TestInt",testId) == 0 ) { uint64_t inject = TESTINT_DCR__TI_INTERRUPT_STATE__INT_PARITY_ERROR_set(1); DCRWritePriv( TESTINT_DCR( TI_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #ifdef TESTUPC #define UPC_C_MMIO ((upc_c_dcr_t*)(PHYMAP_MINADDR_UPC | PHYMAP_PRIVILEGEDOFFSET)) { UPC_C_MMIO->upc_c_interrupt_state__force = UPC_C_DCR__UPC_C_INTERRUPT_STATE__PRING_ERROR_set(1); // An error was detected on the Processor/L2 UPC Daisy Chain ppc_msync(); } #endif #ifdef TESTWakeup { uint64_t inject = _DCR__CLOCKSTOP_INTERRUPT_STATE__STOPPED_set(1); DCRWritePriv( _DCR( CLOCKSTOP_INTERRUPT_STATE__FORCE ), inject ); ppc_msync(); } #endif } } else { if ( fwext_strcmp("L1PBug",testId) == 0 ) { if ( ( ProcessorID() % 4 ) == 0 ) { uint64_t end = GetTimeBase() + 1600ull * 1000ull * 1000ull * 10; while ( GetTimeBase() < end ) { uint64_t esr = in64( (uint64_t*)L1P_ESR ); out64_sync((void *)L1P_ESR, ~esr ); } //printf( "shutting down ...\n"); } } } fwext_barrier( &barrier, N ); fwext_barrier( &barrier, N ); exit(0); }
// Function Specification // // Name: task_check_for_checkstop // // Description: Check for checkstop // // End Function Specification void task_check_for_checkstop(task_t *i_self) { pore_status_t l_gpe0_status; ocb_oisr0_t l_oisr0_status; static bool L_checkstop_traced = FALSE; uint8_t l_reason_code = 0; do { // This check is disabled once a checkstop or frozen GPE is detected if(L_checkstop_traced) { break; } // Looked for a frozen GPE, a sign that the chip has stopped working or // check-stopped. This check also looks for an interrupt status flag that // indicates if the system has check-stopped. l_gpe0_status.value = in64(PORE_GPE0_STATUS); l_oisr0_status.value = in32(OCB_OISR0); if (l_gpe0_status.fields.freeze_action || l_oisr0_status.fields.check_stop) { errlHndl_t l_err = NULL; if (l_gpe0_status.fields.freeze_action) { TRAC_IMP("Frozen GPE0 detected by RTL"); l_reason_code = OCC_GPE_HALTED; } if (l_oisr0_status.fields.check_stop) { TRAC_IMP("System checkstop detected by RTL"); l_reason_code = OCC_SYSTEM_HALTED; } L_checkstop_traced = TRUE; /* * @errortype * @moduleid MAIN_SYSTEM_HALTED_MID * @reasoncode OCC_GPE_HALTED * @userdata1 High order word of PORE_GPE0_STATUS * @userdata2 OCB_OISR0 * @devdesc OCC detected frozen GPE0 */ /* * @errortype * @moduleid MAIN_SYSTEM_HALTED_MID * @reasoncode OCC_SYSTEM_HALTED * @userdata1 High order word of PORE_GPE0_STATUS * @userdata2 OCB_OISR0 * @devdesc OCC detected system checkstop */ l_err = createErrl(MAIN_SYSTEM_HALTED_MID, l_reason_code, OCC_NO_EXTENDED_RC, ERRL_SEV_INFORMATIONAL, NULL, DEFAULT_TRACE_SIZE, l_gpe0_status.words.high_order, l_oisr0_status.value); // The commit code will check for the frozen GPE0 and system // checkstop conditions and take appropriate actions. commitErrl(&l_err); } } while(0); }
// Function Specification // // Name: dcom_initialize_roles // // Description: Initialize roles so we know if we are master or slave // // End Function Specification void dcom_initialize_roles(void) { G_occ_role = OCC_SLAVE; // Locals pba_xcfg_t pbax_cfg_reg; // Used as a debug tool to correlate time between OCCs & System Time // getscom_ffdc(OCB_OTBR, &G_dcomTime.tod, NULL); // Commits errors internally G_dcomTime.tod = in64(OCB_OTBR) >> 4; G_dcomTime.base = ssx_timebase_get(); pbax_cfg_reg.value = in64(PBA_XCFG); if(pbax_cfg_reg.fields.rcv_groupid < MAX_NUM_NODES && pbax_cfg_reg.fields.rcv_chipid < MAX_NUM_OCC) { TRAC_IMP("Proc ChipId (%d) NodeId (%d)", pbax_cfg_reg.fields.rcv_chipid, pbax_cfg_reg.fields.rcv_groupid); G_pbax_id.valid = 1; G_pbax_id.node_id = pbax_cfg_reg.fields.rcv_groupid; G_pbax_id.chip_id = pbax_cfg_reg.fields.rcv_chipid; G_pbax_id.module_id = G_pbax_id.chip_id; // Always start as OCC Slave G_occ_role = OCC_SLAVE; rtl_set_run_mask(RTL_FLAG_NOTMSTR); // Set the initial presence mask, and count the number of occ's present G_sysConfigData.is_occ_present |= (0x01 << G_pbax_id.chip_id); G_occ_num_present = __builtin_popcount(G_sysConfigData.is_occ_present); } else // Invalid chip/node ID(s) { TRAC_ERR("Proc ChipId (%d) and/or NodeId (%d) too high: request reset", pbax_cfg_reg.fields.rcv_chipid, pbax_cfg_reg.fields.rcv_groupid); /* @ * @errortype * @moduleid DCOM_MID_INIT_ROLES * @reasoncode INVALID_CONFIG_DATA * @userdata1 PBAXCFG (upper) * @userdata2 PBAXCFG (lower) * @userdata4 ERC_CHIP_IDS_INVALID * @devdesc Failure determining OCC role */ errlHndl_t l_errl = createErrl( DCOM_MID_INIT_ROLES, //ModId INVALID_CONFIG_DATA, //Reasoncode ERC_CHIP_IDS_INVALID, //Extended reasoncode ERRL_SEV_UNRECOVERABLE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size pbax_cfg_reg.words.high_order, //Userdata1 pbax_cfg_reg.words.low_order //Userdata2 ); // Callout firmware addCalloutToErrl(l_errl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH); //Add processor callout addCalloutToErrl(l_errl, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.proc_huid, ERRL_CALLOUT_PRIORITY_LOW); G_pbax_id.valid = 0; // Invalid Chip/Node ID } // Initialize DCOM Thread Sem ssx_semaphore_create( &G_dcomThreadWakeupSem, // Semaphore 1, // Initial Count 0); // No Max Count }
void amec_update_fw_sensors(void) { errlHndl_t l_err = NULL; int rc = 0; int rc2 = 0; static bool l_first_call = TRUE; bool l_gpe0_idle, l_gpe1_idle; static int L_consec_trace_count = 0; // ------------------------------------------------------ // Update OCC Firmware Sensors from last tick // ------------------------------------------------------ int l_last_state = G_fw_timing.amess_state; // RTLtickdur = duration of last tick's RTL ISR (max = 250us) sensor_update( AMECSENSOR_PTR(RTLtickdur), G_fw_timing.rtl_dur); // AMEintdur = duration of last tick's AMEC portion of RTL ISR sensor_update( AMECSENSOR_PTR(AMEintdur), G_fw_timing.ameint_dur); // AMESSdurX = duration of last tick's AMEC state if(l_last_state >= NUM_AMEC_SMH_STATES) { // Sanity check. Trace this out, even though it should never happen. TRAC_INFO("AMEC State Invalid, Sensor Not Updated"); } else { // AMESSdurX = duration of last tick's AMEC state sensor_update( AMECSENSOR_ARRAY_PTR(AMESSdur0, l_last_state), G_fw_timing.amess_dur); } // ------------------------------------------------------ // Kick off GPE programs to track WorstCase time in GPE // and update the sensors. // ------------------------------------------------------ if( (NULL != G_fw_timing.gpe0_timing_request) && (NULL != G_fw_timing.gpe1_timing_request) ) { //Check if both GPE engines were able to complete the last GPE job on //the queue within 1 tick. l_gpe0_idle = async_request_is_idle(&G_fw_timing.gpe0_timing_request->request); l_gpe1_idle = async_request_is_idle(&G_fw_timing.gpe1_timing_request->request); if(l_gpe0_idle && l_gpe1_idle) { //reset the consecutive trace count L_consec_trace_count = 0; //Both GPE engines finished on time. Now check if they were //successful too. if( async_request_completed(&(G_fw_timing.gpe0_timing_request->request)) && async_request_completed(&(G_fw_timing.gpe1_timing_request->request)) ) { // GPEtickdur0 = duration of last tick's PORE-GPE0 duration sensor_update( AMECSENSOR_PTR(GPEtickdur0), G_fw_timing.gpe_dur[0]); // GPEtickdur1 = duration of last tick's PORE-GPE1 duration sensor_update( AMECSENSOR_PTR(GPEtickdur1), G_fw_timing.gpe_dur[1]); } else { //This case is expected on the first call of the function. //After that, this should not happen. if(!l_first_call) { //Note: FFDC for this case is gathered by each task //responsible for a GPE job. TRAC_INFO("GPE task idle but GPE task did not complete"); } l_first_call = FALSE; } // Update Time used to measure GPE duration. G_fw_timing.rtl_start_gpe = G_fw_timing.rtl_start; // Schedule the GPE Routines that will run and update the worst // case timings (via callback) after they complete. These GPE // routines are the last GPE routines added to the queue // during the RTL tick. rc = pore_flex_schedule(G_fw_timing.gpe0_timing_request); rc2 = pore_flex_schedule(G_fw_timing.gpe1_timing_request); if(rc || rc2) { /* @ * @errortype * @moduleid AMEC_UPDATE_FW_SENSORS * @reasoncode SSX_GENERIC_FAILURE * @userdata1 return code - gpe0 * @userdata2 return code - gpe1 * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Failure to schedule PORE-GPE poreFlex object for FW timing * analysis. */ l_err = createErrl( AMEC_UPDATE_FW_SENSORS, //modId SSX_GENERIC_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_INFORMATIONAL, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size rc, //userdata1 rc2); //userdata2 // commit error log commitErrl( &l_err ); } } else if(L_consec_trace_count < MAX_CONSEC_TRACE) { uint64_t l_dbg1; // Reset will eventually be requested due to not having power measurement // data after X ticks, but add some additional FFDC to the trace that // will tell us what GPE job is currently executing. if(!l_gpe0_idle) { l_dbg1 = in64(PORE_GPE0_DBG1); TRAC_ERR("GPE0 programs did not complete within one tick. DBG1[0x%08x%08x]", l_dbg1 >> 32, l_dbg1 & 0x00000000ffffffffull); }