errlHndl_t checkForIplAttentions() { errlHndl_t err = NULL; assert(!Singleton<Service>::instance().running()); TargetHandleList list; getTargetService().getAllChips(list, TYPE_PROC); TargetHandleList::iterator tit = list.begin(); while(tit != list.end()) { err = Singleton<Service>::instance().handleAttentions(*tit); if(err) { errlCommit(err, ATTN_COMP_ID); } tit = list.erase(tit); } return 0; }
void calloutSymbolData( TargetHandle_t i_mba, const CenRank & i_rank, const MemUtils::MaintSymbols & i_symData, STEP_CODE_DATA_STRUCT & io_sc, PRDpriority i_priority ) { bool dimmsBad[PORT_SLCT_PER_MBA] = { false, false }; for ( MemUtils::MaintSymbols::const_iterator it = i_symData.begin(); it != i_symData.end(); it++ ) { dimmsBad[it->symbol.getPortSlct()] = true; } for ( uint32_t port = 0; port < PORT_SLCT_PER_MBA; port++ ) { if ( dimmsBad[port] ) { TargetHandleList list = getConnectedDimms( i_mba, i_rank, port ); for ( TargetHandleList::iterator it = list.begin(); it != list.end(); it++ ) { io_sc.service_data->SetCallout( *it, i_priority ); } } } }
registerOcc() { runtimeInterfaces_t * rt_intf = getRuntimeInterfaces(); rt_intf->get_lid_list = &UtilLidMgr::getLidList; rt_intf->occ_load = &executeLoadOCC; rt_intf->occ_start = &executeStartOCCs; rt_intf->occ_stop = &executeStopOCCs; rt_intf->process_occ_error = &process_occ_error; rt_intf->process_occ_reset = &process_occ_reset; rt_intf->enable_occ_actuation = &enable_occ_actuation; // If we already loaded OCC during the IPL we need to fix up // the virtual address because we're now not using virtual // memory // Note: We called our memory "ibm,slw-occ-image" but OPAL // created their own range that subsumed ours //@todo-RTC:124392-solve this naming issue... uint64_t l_base_homer = g_hostInterfaces->get_reserved_mem("ibm,homer-image"); TargetHandleList procChips; getAllChips(procChips, TYPE_PROC, true); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { uint64_t l_offset = (*itr)->getAttr<ATTR_POSITION>() * VMM_HOMER_INSTANCE_SIZE; (*itr)->setAttr<ATTR_HOMER_VIRT_ADDR> (l_base_homer+l_offset); } }
TargetHandleList getConnectedDimms( TargetHandle_t i_mba, uint8_t i_port ) { #define PRDF_FUNC "[CalloutUtil::getConnectedDimms] " TargetHandleList o_list; TargetHandleList dimmList = getConnectedDimms( i_mba ); for ( TargetHandleList::iterator dimmIt = dimmList.begin(); dimmIt != dimmList.end(); dimmIt++) { uint8_t portSlct; int32_t l_rc = getMbaPort( *dimmIt, portSlct ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "getMbaPort(0x%08x) failed", getHuid(*dimmIt) ); continue; } if ( portSlct == i_port ) { o_list.push_back( *dimmIt ); } } return o_list; #undef PRDF_FUNC }
void FakeMemTargetService::getMcsList( TargetHandle_t i_proc, TargetHandleList & o_list) { TargetHandleList::iterator mcsBegin = iv_mcses.begin() + getProcFromTarget(i_proc) * cv_membufsPerProc; o_list.insert(o_list.end(), mcsBegin, mcsBegin + cv_membufsPerProc); }
TargetHandle_t TargetServiceImpl::getMcs( TargetHandle_t i_proc, uint64_t i_pos) { PredicateCTM classTypeMatch(CLASS_UNIT, TYPE_MCS); PredicateIsFunctional functionalMatch; PredicatePostfixExpr pred; class ChipUnitMatch : public PredicateBase { uint8_t iv_pos; public: bool operator()(const Target * i_target) const { uint8_t pos; bool match = false; if(i_target->tryGetAttr<ATTR_CHIP_UNIT>(pos)) { match = iv_pos == pos; } return match; } explicit ChipUnitMatch(uint8_t i_pos) : iv_pos(i_pos) {} } chipUnitMatch(i_pos); pred.push(&classTypeMatch).push( &functionalMatch).And(); pred.push(&chipUnitMatch).And(); TargetHandleList list; TargetHandle_t mcs = NULL; targetService().getAssociated( list, i_proc, TARGETING::TargetService::CHILD_BY_AFFINITY, TARGETING::TargetService::ALL, &pred); if(list.size() == 1) { mcs = list[0]; } return mcs; }
/** * @brief Optional plugin function called after analysis is complete but * before PRD exits. * @param i_cenChip A Centaur MBA chip. * @param i_sc The step code data struct. * @note This is especially useful for any analysis that still needs to be * done after the framework clears the FIR bits that were at attention. * @return SUCCESS. */ int32_t PllPostAnalysis( ExtensibleChip * i_cenChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[Membuf::PllPostAnalysis] " int32_t o_rc = SUCCESS; TargetHandle_t cenTrgt = i_cenChip->GetChipHandle(); do { // need to clear associated bits in the MCIFIR bits. o_rc = MemUtils::mcifirCleanup( i_cenChip, i_sc ); if( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"mcifirCleanup() failed"); break; } // Check to make sure we are at threshold and have something garded. if ( !i_sc.service_data->IsAtThreshold() || (GardAction::NoGard == i_sc.service_data->QueryGard()) ) { break; // nothing to do } TargetHandleList list = getConnected( cenTrgt, TYPE_MBA ); if ( 0 == list.size() ) { PRDF_ERR( PRDF_FUNC"getConnected(0x%08x, TYPE_MBA) failed", getHuid(cenTrgt) ); o_rc = FAIL; break; } // Send SKIP_MBA message for each MBA. for ( TargetHandleList::iterator mbaIt = list.begin(); mbaIt != list.end(); ++mbaIt ) { int32_t l_rc = mdiaSendEventMsg( *mbaIt, MDIA::SKIP_MBA ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(0x%08x, SKIP_MBA) failed", getHuid(*mbaIt) ); o_rc |= FAIL; continue; // keep going } } } while(0); return o_rc; #undef PRDF_FUNC }
TargetHandle_t TargetServiceImpl::getMembuf( TargetHandle_t i_mcs) { TargetHandle_t membuf = NULL; TargetHandleList list; getChildAffinityTargets(list, i_mcs, CLASS_CHIP, TYPE_MEMBUF); if(list.size() == 1) { membuf = list[0]; } return membuf; }
void Service::processIntrQMsgPreAck(const msg_t & i_msg) { // this function should do as little as possible // since the hw can't generate additional interrupts // until the msg is acknowledged TargetHandle_t proc = NULL; // --------------------------- // P8 used the XISR structure // P9 uses the PIR structure // --------------------------- // PIR structure is // 17 bits unused, 4 bits group, 3 bits chipId, 1 unused, // 5/4 CoreID(norm/fused), 2/3 ThreadId(norm/fused) PIR_t l_pir; l_pir.word = i_msg.data[1]; TargetHandleList procs; getTargetService().getAllChips(procs, TYPE_PROC); TargetHandleList::iterator it = procs.begin(); // resolve the PIR to a proc target while(it != procs.end()) { uint64_t group = 0, chip = 0; getTargetService().getAttribute(ATTR_FABRIC_GROUP_ID, *it, group); getTargetService().getAttribute(ATTR_FABRIC_CHIP_ID, *it, chip); // for debug, but you have to compile it in ATTN_TRACE("IntrQMsgPreAck: Group:%X Chip:%X PirG:%X PirC:%X PirW:%X", group, chip, l_pir.groupId, l_pir.chipId, l_pir.word ); if ((group == l_pir.groupId) && (chip == l_pir.chipId)) { proc = *it; break; } ++it; } ServiceCommon::processAttnPreAck(proc); }
errlHndl_t Service::processCheckstop() { errlHndl_t err = NULL; AttentionList attentions; assert(!Singleton<Service>::instance().running()); TargetHandleList list; ProcOps & procOps = getProcOps(); attentions.clear(); getTargetService().getAllChips(list, TYPE_PROC); TargetHandleList::iterator tit = list.begin(); while(tit != list.end()) { // query the proc resolver for active attentions // (we also handle mem bufs in this routine) err = procOps.resolve( *tit, 0, attentions); if(err) { ATTN_ERR("procOps.resolve() returned error.HUID:0X%08X ", get_huid( *tit )); break; } ++tit; } if ( NULL == err ) { if(!attentions.empty()) { err = getPrdWrapper().callPrd(attentions); } if(err) { ATTN_ERR("callPrd() returned error." ) } } return err; }
/** * @brief Plugin to add MBA and Dimms behind given port to callout list. * @param i_chip mba chip * @param i_sc The step code data struct. * @param i_port Port Number. * @return SUCCESS */ int32_t CalloutMbaAndDimm( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_port ) { using namespace TARGETING; using namespace CalloutUtil; int32_t o_rc = SUCCESS; TargetHandle_t mbaTarget = i_chip->GetChipHandle(); TargetHandleList calloutList = getConnectedDimms( mbaTarget, i_port ); i_sc.service_data->SetCallout( mbaTarget, MRU_LOW ); for ( TargetHandleList::iterator it = calloutList.begin(); it != calloutList.end(); it++) { i_sc.service_data->SetCallout( *it,MRU_HIGH ); } return o_rc; }
TargetHandle_t TargetServiceImpl::getProc( TargetHandle_t i_membuf) { TargetHandle_t proc = NULL; TargetHandleList list; PredicateCTM pred(CLASS_CHIP, TYPE_PROC); targetService().getAssociated( list, i_membuf, TARGETING::TargetService::PARENT_BY_AFFINITY, TARGETING::TargetService::ALL, &pred); if(list.size() == 1) { proc = list[0]; } return proc; }
TargetHandle_t TargetServiceImpl::getMcs( TargetHandle_t i_membuf) { TargetHandle_t mcs = NULL; TargetHandleList list; PredicateCTM pred(CLASS_UNIT, TYPE_MCS); targetService().getAssociated( list, i_membuf, TARGETING::TargetService::PARENT_BY_AFFINITY, TARGETING::TargetService::IMMEDIATE, &pred); if(list.size() == 1) { mcs = list[0]; } return mcs; }
TargetHandleList getConnectedDimms( TargetHandle_t i_mba, const CenRank & i_rank ) { #define PRDF_FUNC "[CalloutUtil::getConnectedDimms] " TargetHandleList o_list; if ( TYPE_MBA != getTargetType(i_mba) ) { PRDF_ERR( PRDF_FUNC "Invalid target type: HUID=0x%08x", getHuid(i_mba) ); } else { TargetHandleList dimmList = getConnected( i_mba, TYPE_DIMM ); for ( TargetHandleList::iterator dimmIt = dimmList.begin(); dimmIt != dimmList.end(); dimmIt++) { uint8_t dimmSlct; int32_t l_rc = getMbaDimm( *dimmIt, dimmSlct ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "getMbaDimm(0x%08x) failed", getHuid(*dimmIt) ); continue; } if ( dimmSlct == i_rank.getDimmSlct() ) { o_list.push_back( *dimmIt ); } } } return o_list; #undef PRDF_FUNC }
errlHndl_t ServiceCommon::configureInterrupts( ConfigureMode i_mode) { errlHndl_t err = NULL; TargetHandleList procs; getTargetService().getAllChips(procs, TYPE_PROC); TargetHandleList::iterator it = procs.begin(); while(it != procs.end()) { uint64_t mask = 0; // clear GPIO interrupt type status register if(i_mode == UP) { err = putScom(*it, INTR_TYPE_LCL_ERR_STATUS_AND_REG, 0); } if(err) { break; } // unmask GPIO interrupt type mask = 0x8000000000000000ull; err = putScom(*it, (i_mode == UP ? INTR_TYPE_MASK_AND_REG : INTR_TYPE_MASK_OR_REG), i_mode == UP ? ~mask : mask); if(err) { break; } // set GPIO interrupt type mode - or if(i_mode == UP) { err = putScom(*it, INTR_TYPE_CONFIG_AND_REG, ~mask); } if(err) { break; } // enable/disable MCSes mask = 0; GP1::forEach(~0, &mask, &getPbGp2Mask); err = modifyScom(*it, GP2_REG, i_mode == UP ? mask : ~mask, i_mode == UP ? SCOM_OR : SCOM_AND); if(err) { break; } // enable attentions in ipoll mask mask = HostMask::nonHost(); mask |= HostMask::host(); // this doesn't have an and/or reg for some reason... err = modifyScom(*it, IPOLL::address, i_mode == UP ? ~mask : mask, i_mode == UP ? SCOM_AND : SCOM_OR); if(err) { break; } ++it; } return err; }
errlHndl_t startScrub() { #define PRDF_FUNC "[PRDF::startScrub] " PRDF_ENTER( PRDF_FUNC ); errlHndl_t o_errl = NULL; int32_t l_rc = SUCCESS; HUID nodeId = INVALID_HUID; // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; do { // Since the last refresh is in istep10 host_prd_hwreconfig, // it may be good to call it again here at istep16 mss_scrub // to remove any non-functional MBAs from PRD system model. o_errl = noLock_refresh(); // This shouldn't return any error but if it does, break out if(NULL != o_errl) { PRDF_ERR( PRDF_FUNC"noLock_refresh() failed" ); break; } // This is run in Hostboot so there should only be one node. TargetHandleList list = getFunctionalTargetList( TYPE_NODE ); if ( 1 != list.size() ) { PRDF_ERR( PRDF_FUNC"getFunctionalTargetList(TYPE_NODE) failed" ); l_rc = FAIL; break; } nodeId = getHuid(list[0]); PRDF_ENTER( PRDF_FUNC"HUID=0x%08x", nodeId ); // Start scrubbing on all MBAs. MbaDomain * domain = (MbaDomain *)systemPtr->GetDomain(MBA_DOMAIN); if ( NULL == domain ) { PRDF_ERR( PRDF_FUNC"MBA_DOMAIN not found. nodeId=0x%08x", nodeId ); l_rc = FAIL; break; } l_rc = domain->startScrub(); PRDF_EXIT( PRDF_FUNC"HUID=0x%08x", nodeId ); } while (0); if (( SUCCESS != l_rc ) && (NULL == o_errl)) { // Get user data uint64_t ud12 = PRDF_GET_UINT64_FROM_UINT32( nodeId, __LINE__ ); uint64_t ud34 = PRDF_GET_UINT64_FROM_UINT32( 0, 0 ); // Create error log o_errl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity PRDF_START_SCRUB, // module ID PRDF_DETECTED_FAIL_SOFTWARE, // reason code ud12, ud34 ); // user data 1-4 // Add 2nd level support o_errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH ); // Add traces o_errl->collectTrace( PRDF_COMP_NAME, 512 ); } PRDF_EXIT( PRDF_FUNC ); return o_errl; #undef PRDF_FUNC }
errlHndl_t PegasusConfigurator::addDomainChips( TARGETING::TYPE i_type, RuleChipDomain * io_domain, PllDomainList * io_pllDomains, PllDomainList * io_pllDomains2) { using namespace TARGETING; errlHndl_t l_errl = NULL ; // Get references to factory objects. ScanFacility & scanFac = ScanFacility::Access(); ResolutionFactory & resFac = ResolutionFactory::Access(); // Get all targets of specified type and add to given domain. TargetHandleList list = PlatServices::getFunctionalTargetList( i_type ); if ( 0 == list.size() ) { PRDF_ERR( "[addDomainChips] getFunctionalTargetList " "returned empty list for i_type=%d", i_type ); } else { // Get rule filename based on type. const char * fileName = ""; switch ( i_type ) { case TYPE_PROC: // Check which Proc chip type if (MODEL_NAPLES == getProcModel(list[0])) fileName = NaplesProc; else fileName = MuranoVeniceProc; break; case TYPE_EX: fileName = Ex; break; case TYPE_MCS: fileName = Mcs; break; case TYPE_MEMBUF: fileName = Membuf; break; case TYPE_MBA: fileName = Mba; break; default: // Print a trace statement, but do not fail the build. PRDF_ERR( "[addDomainChips] Unsupported target type: %d", i_type ); } for ( TargetHandleList::const_iterator itr = list.begin(); itr != list.end(); ++itr ) { if ( NULL == *itr ) continue; RuleChip * chip = new RuleChip( fileName, *itr, scanFac, resFac,l_errl ); if( NULL != l_errl ) { delete chip; break; } sysChipLst.push_back( chip ); io_domain->AddChip( chip ); // PLL domains switch ( i_type ) { case TYPE_PROC: addChipsToPllDomain(CLOCK_DOMAIN_FAB, io_pllDomains, chip, *itr, scanFac, resFac); addChipsToPllDomain(CLOCK_DOMAIN_IO, io_pllDomains2, chip, *itr, scanFac, resFac); break; case TYPE_MEMBUF: addChipsToPllDomain(CLOCK_DOMAIN_MEMBUF, io_pllDomains, chip, *itr, scanFac, resFac); break; default: break; } } // Flush rule table cache since objects are all built. Prdr::LoadChipCache::flushCache(); } return l_errl; }
/** @func GetCheckstopInfo * To be called from the fabric domain to gather Checkstop information. This * information is used in a sorting algorithm. * * This is a plugin function: GetCheckstopInfo * * @param i_chip - The chip. * @param o_wasInternal - True if this chip has an internal checkstop. * @param o_externalChips - List of external fabrics driving checkstop. * @param o_wofValue - Current WOF value (unused for now). */ int32_t GetCheckstopInfo( ExtensibleChip * i_chip, bool & o_wasInternal, TargetHandleList & o_externalChips, uint64_t & o_wofValue ) { // Clear parameters. o_wasInternal = false; o_externalChips.erase(o_externalChips.begin(), o_externalChips.end()); o_wofValue = 0; SCAN_COMM_REGISTER_CLASS * l_globalFir = i_chip->getRegister("GLOBAL_CS_FIR"); SCAN_COMM_REGISTER_CLASS * l_pbXstpFir = i_chip->getRegister("PB_CHIPLET_CS_FIR"); SCAN_COMM_REGISTER_CLASS * l_extXstpFir = i_chip->getRegister("PBEXTFIR"); int32_t o_rc = SUCCESS; o_rc |= l_globalFir->Read(); o_rc |= l_pbXstpFir->Read(); o_rc |= l_extXstpFir->Read(); if(o_rc) { PRDF_ERR( "[GetCheckstopInfo] SCOM fail on 0x%08x rc=%x", i_chip->GetId(), o_rc); return o_rc; } if ((0 != l_globalFir->GetBitFieldJustified(0,32)) && (!l_globalFir->IsBitSet(2) || !l_pbXstpFir->IsBitSet(2))) o_wasInternal = true; // Get connected chips. uint32_t l_connectedXstps = l_extXstpFir->GetBitFieldJustified(0,7); uint32_t l_positions[] = { 0, // bit 0 - XBUS 0 1, // bit 1 - XBUS 1 2, // bit 2 - XBUS 2 3, // bit 3 - XBUS 3 0, // bit 4 - ABUS 0 1, // bit 5 - ABUS 1 2 // bit 6 - ABUS 2 }; for (int i = 0, j = 0x40; i < 7; i++, j >>= 1) { if (0 != (j & l_connectedXstps)) { TargetHandle_t l_connectedFab = getConnectedPeerProc(i_chip->GetChipHandle(), i<4 ? TYPE_XBUS : TYPE_ABUS, l_positions[i]); if (NULL != l_connectedFab) { o_externalChips.push_back(l_connectedFab); } } } // Read WOF value. SCAN_COMM_REGISTER_CLASS * l_wof = i_chip->getRegister("TODWOF"); o_rc |= l_wof->Read(); if(o_rc) { PRDF_ERR( "[GetCheckstopInfo] SCOM fail on 0x%08x rc=%x", i_chip->GetId(), o_rc); return o_rc; } o_wofValue = l_wof->GetBitFieldJustified(0,64); return SUCCESS; } PRDF_PLUGIN_DEFINE( Proc, GetCheckstopInfo );
int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[CenMbaTdCtlr::handleUE] " using namespace CalloutUtil; int32_t o_rc = SUCCESS; iv_tdState = NO_OP; // Abort the TD procedure. setTdSignature( io_sc, PRDFSIG_MaintUE ); io_sc.service_data->SetServiceCall(); CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); do { // Clean up the maintenance command. This is needed just in case the UE // isolation procedure is modified to use maintenance commands. o_rc = cleanupPrevCmd(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); break; } // Look for all failing bits on this rank. CenDqBitmap bitmap; o_rc = mssIplUeIsolation( iv_mbaTrgt, iv_rank, bitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"mssIplUeIsolation() failed" ); break; } // Add UE data to capture data. bitmap.getCaptureData( io_sc.service_data->GetCaptureData() ); // Callout the failing DIMMs. TargetHandleList callouts; for ( int32_t ps = 0; ps < PORT_SLCT_PER_MBA; ps++ ) { bool badDqs = false; o_rc = bitmap.badDqs( ps, badDqs ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"badDqs(%d) failed", ps ); break; } if ( !badDqs ) continue; // nothing to do. TargetHandleList dimms = getConnectedDimms(iv_mbaTrgt, iv_rank, ps); if ( 0 == dimms.size() ) { PRDF_ERR( PRDF_FUNC"getConnectedDimms(%d) failed", ps ); o_rc = FAIL; break; } callouts.insert( callouts.end(), dimms.begin(), dimms.end() ); if ( isMfgCeCheckingEnabled() ) { // As we are doing callout for UE, we dont need to do callout // during CE for this rank on given port mbadb->getIplCeStats()->banAnalysis( iv_rank, ps ); } } if ( SUCCESS != o_rc ) break; if ( 0 == callouts.size() ) { // It is possible the scrub counters have rolled over to zero due to // a known DD1.0 hardware bug. In this case, the best we can do is // callout both DIMMs, because at minimum we know there was a UE, we // just don't know where. // NOTE: If this condition happens because of a DD2.0+ bug, the // mssIplUeIsolation procedure will callout the Centaur. callouts = getConnectedDimms( iv_mbaTrgt, iv_rank ); if ( 0 == callouts.size() ) { PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" ); o_rc = FAIL; break; } if ( isMfgCeCheckingEnabled() ) { // As we are doing callout for UE, we dont need to do callout // during CE for this rank on both port mbadb->getIplCeStats()->banAnalysis( iv_rank); } } // Callout all DIMMs in the list. for ( TargetHandleList::iterator i = callouts.begin(); i != callouts.end(); i++ ) { io_sc.service_data->SetCallout( *i, MRU_HIGH ); } } while(0); return o_rc; #undef PRDF_FUNC }
errlHndl_t discoverTargets() { HWAS_INF("discoverTargets entry"); errlHndl_t errl = NULL; // loop through all the targets and set HWAS_STATE to a known default for (TargetIterator target = targetService().begin(); target != targetService().end(); ++target) { HwasState hwasState = target->getAttr<ATTR_HWAS_STATE>(); hwasState.deconfiguredByEid = 0; hwasState.poweredOn = false; hwasState.present = false; hwasState.functional = false; hwasState.dumpfunctional = false; target->setAttr<ATTR_HWAS_STATE>(hwasState); } // Assumptions and actions: // CLASS_SYS (exactly 1) - mark as present // CLASS_ENC, TYPE_PROC, TYPE_MEMBUF, TYPE_DIMM // (ALL require hardware query) - call platPresenceDetect // \->children: CLASS_* (NONE require hardware query) - mark as present do { // find CLASS_SYS (the top level target) Target* pSys; targetService().getTopLevelTarget(pSys); HWAS_ASSERT(pSys, "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found"); // mark this as present enableHwasState(pSys, true, true, 0); HWAS_DBG("pSys %.8X - marked present", pSys->getAttr<ATTR_HUID>()); // find list of all we need to call platPresenceDetect against PredicateCTM predEnc(CLASS_ENC); PredicateCTM predChip(CLASS_CHIP); PredicateCTM predDimm(CLASS_LOGICAL_CARD, TYPE_DIMM); PredicatePostfixExpr checkExpr; checkExpr.push(&predChip).push(&predDimm).Or().push(&predEnc).Or(); TargetHandleList pCheckPres; targetService().getAssociated( pCheckPres, pSys, TargetService::CHILD, TargetService::ALL, &checkExpr ); // pass this list to the hwas platform-specific api where // pCheckPres will be modified to only have present targets HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); errl = platPresenceDetect(pCheckPres); HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); if (errl != NULL) { break; // break out of the do/while so that we can return } // for each, read their ID/EC level. if that works, // mark them and their descendants as present // read the partialGood vector to determine if any are not functional // and read and store values from the PR keyword // list of procs and data that we'll need to look at the PR keyword procRestrict_t l_procEntry; std::vector <procRestrict_t> l_procPRList; // sort the list by ATTR_HUID to ensure that we // start at the same place each time std::sort(pCheckPres.begin(), pCheckPres.end(), compareTargetHuid); for (TargetHandleList::const_iterator pTarget_it = pCheckPres.begin(); pTarget_it != pCheckPres.end(); ++pTarget_it ) { TargetHandle_t pTarget = *pTarget_it; // if CLASS_ENC is still in this list, mark as present if (pTarget->getAttr<ATTR_CLASS>() == CLASS_ENC) { enableHwasState(pTarget, true, true, 0); HWAS_DBG("pTarget %.8X - CLASS_ENC marked present", pTarget->getAttr<ATTR_HUID>()); // on to the next target continue; } bool chipPresent = true; bool chipFunctional = true; uint32_t errlEid = 0; uint16_t pgData[VPD_CP00_PG_DATA_LENGTH / sizeof(uint16_t)]; bzero(pgData, sizeof(pgData)); if (pTarget->getAttr<ATTR_CLASS>() == CLASS_CHIP) { // read Chip ID/EC data from these physical chips errl = platReadIDEC(pTarget); if (errl) { // read of ID/EC failed even tho we THOUGHT we were present. HWAS_INF("pTarget %.8X - read IDEC failed (eid 0x%X) - bad", errl->eid(), pTarget->getAttr<ATTR_HUID>()); // chip NOT present and NOT functional, so that FSP doesn't // include this for HB to process chipPresent = false; chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else if (pTarget->getAttr<ATTR_TYPE>() == TYPE_PROC) { // read partialGood vector from these as well. errl = platReadPartialGood(pTarget, pgData); if (errl) { // read of PG failed even tho we were present.. HWAS_INF("pTarget %.8X - read PG failed (eid 0x%X)- bad", errl->eid(), pTarget->getAttr<ATTR_HUID>()); chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else // look at the 'nest' logic to override the functionality // of this proc if (pgData[VPD_CP00_PG_PIB_INDEX] != VPD_CP00_PG_PIB_GOOD) { HWAS_INF("pTarget %.8X - PIB pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_PIB_INDEX, VPD_CP00_PG_PIB_GOOD); chipFunctional = false; } else if (pgData[VPD_CP00_PG_PERVASIVE_INDEX] != VPD_CP00_PG_PERVASIVE_GOOD) { HWAS_INF("pTarget %.8X - Pervasive pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_PERVASIVE_INDEX, VPD_CP00_PG_PERVASIVE_GOOD); chipFunctional = false; } else if ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_BASE) != VPD_CP00_PG_POWERBUS_BASE) { HWAS_INF("pTarget %.8X - PowerBus pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_BASE); chipFunctional = false; } else { // read the PR keywords that we need, so that if // we have errors, we can handle them as approprite. uint8_t prData[VPD_VINI_PR_DATA_LENGTH/sizeof(uint8_t)]; bzero(prData, sizeof(prData)); errl = platReadPR(pTarget, prData); if (errl != NULL) { // read of PR keyword failed HWAS_INF("pTarget %.8X - read PR failed - bad", pTarget->getAttr<ATTR_HUID>()); chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else { // save info so that we can // process the PR keyword after this loop HWAS_INF("pTarget %.8X - pushing to procPRlist; FRU_ID %d", pTarget->getAttr<ATTR_HUID>(), pTarget->getAttr<ATTR_FRU_ID>()); l_procEntry.target = pTarget; l_procEntry.group = pTarget->getAttr<ATTR_FRU_ID>(); l_procEntry.procs = (prData[7] & VPD_VINI_PR_B7_MASK) + 1; l_procEntry.maxEXs = l_procEntry.procs * (prData[2] & VPD_VINI_PR_B2_MASK) >> VPD_VINI_PR_B2_SHIFT; l_procPRList.push_back(l_procEntry); if (l_procEntry.maxEXs == 0) { // this is PROBABLY bad PR, so YELL... HWAS_ERR("pTarget %.8X - PR VPD says 0 CORES", pTarget->getAttr<ATTR_HUID>()); } } } } // TYPE_PROC } // CLASS_CHIP HWAS_DBG("pTarget %.8X - detected present, %sfunctional", pTarget->getAttr<ATTR_HUID>(), chipFunctional ? "" : "NOT "); // now need to mark all of this target's // physical descendants as present and functional as appropriate TargetHandleList pDescList; targetService().getAssociated( pDescList, pTarget, TargetService::CHILD, TargetService::ALL); for (TargetHandleList::const_iterator pDesc_it = pDescList.begin(); pDesc_it != pDescList.end(); ++pDesc_it) { TargetHandle_t pDesc = *pDesc_it; // by default, the descendant's functionality is 'inherited' bool descFunctional = chipFunctional; if (chipFunctional) { // if the chip is functional, the look through the // partialGood vector to see if its chiplets // are functional if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_XBUS) && (pgData[VPD_CP00_PG_XBUS_INDEX] != VPD_CP00_PG_XBUS_GOOD)) { HWAS_INF("pDesc %.8X - XBUS pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_XBUS_INDEX, VPD_CP00_PG_XBUS_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_ABUS) && (pgData[VPD_CP00_PG_ABUS_INDEX] != VPD_CP00_PG_ABUS_GOOD)) { HWAS_INF("pDesc %.8X - ABUS pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_ABUS_INDEX, VPD_CP00_PG_ABUS_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_PCI) && (pgData[VPD_CP00_PG_PCIE_INDEX] != VPD_CP00_PG_PCIE_GOOD)) { HWAS_INF("pDesc %.8X - PCIe pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_PCIE_INDEX, VPD_CP00_PG_PCIE_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_EX) || (pDesc->getAttr<ATTR_TYPE>() == TYPE_CORE) ) { ATTR_CHIP_UNIT_type indexEX = pDesc->getAttr<ATTR_CHIP_UNIT>(); if (pgData[VPD_CP00_PG_EX0_INDEX + indexEX] != VPD_CP00_PG_EX0_GOOD) { HWAS_INF("pDesc %.8X - CORE/EX%d pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexEX, VPD_CP00_PG_EX0_INDEX + indexEX, VPD_CP00_PG_EX0_GOOD); descFunctional = false; } } else if (pDesc->getAttr<ATTR_TYPE>() == TYPE_MCS) { ATTR_CHIP_UNIT_type indexMCS = pDesc->getAttr<ATTR_CHIP_UNIT>(); // check: MCS 0..3 in MCL, MCS 4..7 in MCR if (((indexMCS >=0) && (indexMCS <=3)) && ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_MCL) == 0)) { HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCL expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexMCS, VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_MCL); descFunctional = false; } else if (((indexMCS >=4) && (indexMCS <=7)) && ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_MCR) == 0)) { HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCR expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexMCS, VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_MCR); descFunctional = false; } } } // chipFunctional // for sub-parts, if it's not functional, it's not present. enableHwasState(pDesc, descFunctional, descFunctional, errlEid); HWAS_DBG("pDesc %.8X - marked %spresent, %sfunctional", pDesc->getAttr<ATTR_HUID>(), descFunctional ? "" : "NOT ", descFunctional ? "" : "NOT "); } // set HWAS state to show CHIP is present, functional per above enableHwasState(pTarget, chipPresent, chipFunctional, errlEid); } // for pTarget_it
void Service::processIntrQMsgPreAck(const msg_t & i_msg) { // this function should do as little as possible // since the hw can't generate additional interrupts // until the msg is acknowledged TargetHandle_t proc = NULL; INTR::XISR_t xisr; xisr.u32 = i_msg.data[0]; TargetHandleList procs; getTargetService().getAllChips(procs, TYPE_PROC); TargetHandleList::iterator it = procs.begin(); // resolve the xisr to a proc target while(it != procs.end()) { uint64_t node = 0, chip = 0; getTargetService().getAttribute(ATTR_FABRIC_NODE_ID, *it, node); getTargetService().getAttribute(ATTR_FABRIC_CHIP_ID, *it, chip); if(node == xisr.node && chip == xisr.chip) { proc = *it; break; } ++it; } uint64_t hostMask = HostMask::host(); uint64_t nonHostMask = HostMask::nonHost(); uint64_t data = 0; // do the minimum that is required // for sending EOI without getting // another interrupt. for host attentions // this is clearing the gpio interrupt // type status register // and for xstp,rec,spcl this is // masking the appropriate bit in // ipoll mask // read the ipoll status register // to determine the interrupt was // caused by host attn or something // else (xstp,rec,spcl) errlHndl_t err = getScom(proc, IPOLL_STATUS_REG, data); if(err) { errlCommit(err, ATTN_COMP_ID); // assume everything is on data = hostMask | nonHostMask; } if(data & hostMask) { // if host attention, clear the ITR macro gpio interrupt // type status register. err = putScom(proc, INTR_TYPE_LCL_ERR_STATUS_AND_REG, 0); if(err) { errlCommit(err, ATTN_COMP_ID); } } if(data & nonHostMask) { // mask local proc xstp,rec and/or special attns if on. // the other thread might be trying to unmask // on the same target. The mutex ensures // neither thread corrupts the register. mutex_lock(&iv_mutex); err = modifyScom(proc, IPOLL::address, data & nonHostMask, SCOM_OR); mutex_unlock(&iv_mutex); if(err) { errlCommit(err, ATTN_COMP_ID); } } }
errlHndl_t Service::configureInterrupts( msg_q_t i_q, ConfigureMode i_mode) { errlHndl_t err = NULL; // First register for Q // This will set up the psi host bridge logic for // lcl_err interrupt on all chips if(i_mode == UP) { err = INTR::registerMsgQ(i_q, ATTENTION, INTR::ISN_LCL_ERR); } // setup the ITR macro for GPIO type host attentions, // on all procs if(!err) { TargetHandleList procs; getTargetService().getAllChips(procs, TYPE_PROC); TargetHandleList::iterator it = procs.begin(); while(it != procs.end()) { uint64_t mask = 0; // clear GPIO interrupt type status register if(i_mode == UP) { err = putScom(*it, INTR_TYPE_LCL_ERR_STATUS_AND_REG, 0); } if(err) { break; } // unmask GPIO interrupt type mask = 0x8000000000000000ull; err = putScom( *it, (i_mode == UP ? INTR_TYPE_MASK_AND_REG : INTR_TYPE_MASK_OR_REG), i_mode == UP ? ~mask : mask); if(err) { break; } // set GPIO interrupt type mode - or if(i_mode == UP) { err = putScom(*it, INTR_TYPE_CONFIG_AND_REG, ~mask); } if(err) { break; } // enable/disable MCSes mask = 0; GP1::forEach(~0, &mask, &getPbGp2Mask); err = modifyScom( *it, GP2_REG, i_mode == UP ? mask : ~mask, i_mode == UP ? SCOM_OR : SCOM_AND); if(err) { break; } // enable attentions in ipoll mask mask = HostMask::nonHost(); mask |= HostMask::host(); // this doesn't have an and/or reg for some reason... err = modifyScom( *it, IPOLL::address, i_mode == UP ? ~mask : mask, i_mode == UP ? SCOM_AND : SCOM_OR); if(err) { break; } ++it; } if(!err && i_mode == DOWN) { if(NULL == INTR::unRegisterMsgQ(INTR::ISN_LCL_ERR)) { ATTN_ERR("INTR did not find isn: 0x%07x", INTR::ISN_LCL_ERR); } } } return err; }
errlHndl_t getHwConfig( HOMER_Data_t & o_data, const HwInitialized_t i_curHw ) { #define FUNC "[PRDF::getHwConfig] " errlHndl_t errl = NULL; do { //---------------------------------------------------------------------- // Get hardware config information. //---------------------------------------------------------------------- // Get the master PROC position. TargetHandle_t masterProc = getMasterProc(); if ( NULL == masterProc ) { PRDF_ERR( FUNC "master PROC is NULL" ); /*@ * @errortype * @reasoncode PRDF_NULL_VALUE_RETURNED * @severity ERRL_SEV_UNRECOVERABLE * @moduleid PRDF_CS_FIRDATA_WRITE * @userdata1 0 * @userdata2 0 * @devdesc NULL system target. */ errl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, PRDF_CS_FIRDATA_WRITE, PRDF_NULL_VALUE_RETURNED, 0, 0 ); break; } o_data.masterProc = getTargetPosition( masterProc ); // Iterate the list functional PROCs. TargetHandleList procList = getFunctionalTargetList( TYPE_PROC ); for ( TargetHandleList::iterator procIt = procList.begin(); procIt != procList.end(); ++procIt ) { uint32_t procPos = getTargetPosition(*procIt); PRDF_ASSERT( procPos < MAX_PROC_PER_NODE ); o_data.procMask |= 0x80 >> procPos; // Get the PROC FSI address. FSI::FsiLinkInfo_t fsiInfo; FSI::getFsiLinkInfo( *procIt, fsiInfo ); o_data.procFsiBaseAddr[procPos] = fsiInfo.baseAddr; // Iterate the connected EXs. TargetHandleList exList = getConnected( *procIt, TYPE_EX ); for ( TargetHandleList::iterator exIt = exList.begin(); exIt != exList.end(); ++exIt ) { uint32_t exPos = getTargetPosition( *exIt ); PRDF_ASSERT( exPos < MAX_EX_PER_PROC ); o_data.exMasks[procPos] |= 0x8000 >> exPos; } if (ALL_PROC_MEM_MASTER_CORE == i_curHw || ALL_HARDWARE == i_curHw) { // Iterate the connected MCSs. TargetHandleList mcsList = getConnected( *procIt, TYPE_MCS ); for ( TargetHandleList::iterator mcsIt = mcsList.begin(); mcsIt != mcsList.end(); ++mcsIt ) { uint32_t mcsPos = getTargetPosition( *mcsIt ); PRDF_ASSERT( mcsPos < MAX_MCS_PER_PROC ); o_data.mcsMasks[procPos] |= 0x80 >> mcsPos; } // Iterate the connected MEMBUFs. TargetHandleList membList = getConnected(*procIt, TYPE_MEMBUF ); for ( TargetHandleList::iterator membIt = membList.begin(); membIt != membList.end(); ++membIt ) { uint32_t membPos = getTargetPosition(*membIt); PRDF_ASSERT( membPos < MAX_MEMB_PER_PROC ); o_data.membMasks[procPos] |= 0x80 >> membPos; // Get the MEMBUF FSI address. getFsiLinkInfo( *membIt, fsiInfo ); o_data.membFsiBaseAddr[procPos][membPos] = fsiInfo.baseAddr; // Iterate the connected MBAs. TargetHandleList mbaList = getConnected(*membIt, TYPE_MBA ); for ( TargetHandleList::iterator mbaIt = mbaList.begin(); mbaIt != mbaList.end(); ++mbaIt ) { uint32_t mbaPos = getTargetPosition(*mbaIt); uint32_t shift = membPos * MAX_MBA_PER_MEMBUF + mbaPos; PRDF_ASSERT( shift < MAX_MBA_PER_PROC ); o_data.mbaMasks[procPos] |= 0x8000 >> shift; } } } } } while (0); return errl; #undef FUNC }
/** * @brief Enable and disable special wakeup for SCOM operations * FSP: Call the Host interface wakeup function for all core * targets under the specified target, HOST wakeup bit is used * BMC: Call the wakeup HWP for the target type specified (EQ/EX/CORE), * Proc type calls the HWP for all cores, FSP wakeup bit is used */ errlHndl_t handleSpecialWakeup(TARGETING::Target* i_target, bool i_enable) { errlHndl_t l_errl = NULL; TARGETING::TYPE l_type = i_target->getAttr<TARGETING::ATTR_TYPE>(); // FSP if(INITSERVICE::spBaseServicesEnabled()) { // Check for valid interface function if( g_hostInterfaces == NULL || g_hostInterfaces->wakeup == NULL ) { TRACFCOMP( g_trac_scom, ERR_MRK"Hypervisor wakeup interface not linked"); /*@ * @errortype * @moduleid SCOM_HANDLE_SPECIAL_WAKEUP * @reasoncode SCOM_RUNTIME_INTERFACE_ERR * @userdata1 Target HUID * @userdata2 Wakeup Enable * @devdesc Wakeup runtime interface not linked. */ l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL, SCOM_HANDLE_SPECIAL_WAKEUP, SCOM_RUNTIME_INTERFACE_ERR, get_huid(i_target), i_enable); l_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_HIGH); return l_errl; } TargetHandleList pCoreList; // If the target is already a core just push it on the list if(l_type == TARGETING::TYPE_CORE) { pCoreList.clear(); pCoreList.push_back(i_target); } else { getChildChiplets( pCoreList, i_target, TARGETING::TYPE_CORE ); } // Call wakeup on all core targets // Wakeup may be called twice for fused cores for ( auto pCore_it = pCoreList.begin(); pCore_it != pCoreList.end(); ++pCore_it ) { // Runtime target id RT_TARG::rtChipId_t rtTargetId = 0; l_errl = RT_TARG::getRtTarget(*pCore_it, rtTargetId); if(l_errl) { break; } uint32_t mode; if(i_enable) { mode = HBRT_WKUP_FORCE_AWAKE; } else { mode = HBRT_WKUP_CLEAR_FORCE; } // Do the special wakeup int l_rc = g_hostInterfaces->wakeup(rtTargetId,mode); if(l_rc) { TRACFCOMP( g_trac_scom,ERR_MRK "Hypervisor wakeup failed. " "rc 0x%X target_huid 0x%llX rt_target_id 0x%llX mode %d", l_rc, get_huid(*pCore_it), rtTargetId, mode ); // convert rc to error log /*@ * @errortype * @moduleid SCOM_HANDLE_SPECIAL_WAKEUP * @reasoncode SCOM_RUNTIME_WAKEUP_ERR * @userdata1 Hypervisor return code * @userdata2[0:31] Runtime Target ID * @userdata2[32:63] Wakeup Mode * @devdesc Hypervisor wakeup failed. */ l_errl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_INFORMATIONAL, SCOM_HANDLE_SPECIAL_WAKEUP, SCOM_RUNTIME_WAKEUP_ERR, l_rc, TWO_UINT32_TO_UINT64( rtTargetId, mode)); l_errl->addHwCallout(i_target, HWAS::SRCI_PRIORITY_LOW, HWAS::NO_DECONFIG, HWAS::GARD_NULL); break; } } } // BMC else { if(l_type == TARGETING::TYPE_PROC) { // Call wakeup on all core targets TargetHandleList pCoreList; getChildChiplets( pCoreList, i_target, TARGETING::TYPE_CORE ); for ( auto pCore_it = pCoreList.begin(); pCore_it != pCoreList.end(); ++pCore_it ) { // To simplify, just call recursively with the core target l_errl = handleSpecialWakeup(*pCore_it, i_enable); if(l_errl) { break; } } return l_errl; } // Need to handle multiple calls to enable special wakeup // Count attribute will keep track and disable when zero // Assume HBRT is single-threaded, so no issues with concurrency uint32_t l_count = (i_target)->getAttr<ATTR_SPCWKUP_COUNT>(); if((l_count==0) && !i_enable) { TRACFCOMP( g_trac_scom,ERR_MRK "Disabling special wakeup on target with SPCWKUP_COUNT=0"); /*@ * @errortype * @moduleid SCOM_HANDLE_SPECIAL_WAKEUP * @reasoncode SCOM_RUNTIME_SPCWKUP_COUNT_ERR * @userdata1 Target HUID * @userdata2[0:31] Wakeup Enable * @userdata2[32:63] Wakeup Count * @devdesc Disabling special wakeup when not enabled. */ l_errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_INFORMATIONAL, SCOM_HANDLE_SPECIAL_WAKEUP, SCOM_RUNTIME_SPCWKUP_COUNT_ERR, get_huid(i_target), TWO_UINT32_TO_UINT64( i_enable, l_count)); l_errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_LOW); errlCommit( l_errl, RUNTIME_COMP_ID ); } // Only call the HWP if 0-->1 or 1-->0 if( ((l_count==0) && i_enable) || ((l_count==1) && !i_enable) ) { // NOTE Regarding the entity type passed to the HWP: // There are 3 independent registers used to trigger a // special wakeup (FSP,HOST,OCC), we are using the FSP // bit because HOST/OCC are already in use. p9specialWakeup::PROC_SPCWKUP_OPS l_spcwkupType; if(i_enable) { l_spcwkupType = p9specialWakeup::SPCWKUP_ENABLE; } else { l_spcwkupType = p9specialWakeup::SPCWKUP_DISABLE; } if(l_type == TARGETING::TYPE_EQ) { fapi2::Target<fapi2::TARGET_TYPE_EQ> l_fapi_target(const_cast<TARGETING::Target*>(i_target)); FAPI_INVOKE_HWP( l_errl, p9_cpu_special_wakeup_eq, l_fapi_target, l_spcwkupType, p9specialWakeup::FSP ); } else if(l_type == TARGETING::TYPE_EX) { fapi2::Target<fapi2::TARGET_TYPE_EX_CHIPLET> l_fapi_target(const_cast<TARGETING::Target*>(i_target)); FAPI_INVOKE_HWP( l_errl, p9_cpu_special_wakeup_ex, l_fapi_target, l_spcwkupType, p9specialWakeup::FSP ); } else if(l_type == TARGETING::TYPE_CORE) { fapi2::Target<fapi2::TARGET_TYPE_CORE> l_fapi_target(const_cast<TARGETING::Target*>(i_target)); FAPI_INVOKE_HWP( l_errl, p9_cpu_special_wakeup_core, l_fapi_target, l_spcwkupType, p9specialWakeup::FSP ); } if(l_errl) { TRACFCOMP( g_trac_scom, "p9_cpu_special_wakeup ERROR :" " Returning errorlog, reason=0x%x", l_errl->reasonCode() ); // Capture the target data in the elog ERRORLOG::ErrlUserDetailsTarget(i_target).addToLog( l_errl ); } } // Update the counter if(!l_errl) { if(i_enable) { l_count++; } else { l_count--; } i_target->setAttr<ATTR_SPCWKUP_COUNT>(l_count); } } return l_errl; }
/** * @brief Stops OCCs on all Processors in the node */ errlHndl_t stopAllOCCs() { TRACFCOMP( g_fapiTd,ENTER_MRK"stopAllOCCs" ); errlHndl_t l_errl = NULL; bool winkle_loaded = false; do { #ifndef __HOSTBOOT_RUNTIME //OCC requires the build_winkle_images library if ( !VFS::module_is_loaded( "libbuild_winkle_images.so" ) ) { l_errl = VFS::module_load( "libbuild_winkle_images.so" ); if ( l_errl ) { // load module returned with errl set TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" ); // break from do loop if error occured break; } winkle_loaded = true; } #endif TargetHandleList procChips; getAllChips(procChips, TYPE_PROC, true); if(procChips.size() == 0) { TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" ); //We'll never get this far in the IPL without any processors, // so just exit. break; } TRACFCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: %d procs found", procChips.size()); //The OCC Procedures require processors within a DCM be //setup together. If DCM installed is set, we work under //the assumption that each nodeID is a DCM. So sort the //list by NodeID then call OCC Procedures on NodeID pairs. std::sort(procChips.begin(), procChips.end(), orderByNodeAndPosition); //The OCC master for the node must be reset last. For all //OP systems there is only a single OCC that can be the //master so it is safe to look at the MASTER_CAPABLE flag. Target* masterProc0 = NULL; Target* masterProc1 = NULL; TargetHandleList::iterator itr1 = procChips.begin(); if(0 == (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>()) { TRACUCOMP( g_fapiTd, INFO_MRK"stopAllOCCs: non-dcm path entered"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { TargetHandleList pOccs; getChildChiplets(pOccs, *itr, TYPE_OCC); if (pOccs.size() > 0) { if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() ) { masterProc0 = *itr; continue; } } l_errl = HBOCC::stopOCC( *itr, NULL ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed"); errlCommit (l_errl, HWPF_COMP_ID); // just commit and try the next chip } } if (l_errl) { break; } } else { TRACFCOMP( g_fapiTd, INFO_MRK"stopAllOCCs: Following DCM Path"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { Target* targ0 = *itr; Target* targ1 = NULL; TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: Cur target nodeID=%d", targ0->getAttr<ATTR_FABRIC_NODE_ID>()); //if the next target in the list is in the same node // they are on the same DCM, so bump itr forward // and update targ0 pointer if((itr+1) != procChips.end()) { TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: n+1 target nodeID=%d", ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())); if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) == ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())) { //need to flip the numbers because we were reversed targ1 = targ0; itr++; targ0 = *itr; } } TargetHandleList pOccs; getChildChiplets(pOccs, targ0, TYPE_OCC); if (pOccs.size() > 0) { if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() ) { masterProc0 = targ0; masterProc1 = targ1; continue; } } l_errl = HBOCC::stopOCC( targ0, targ1 ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed"); errlCommit (l_errl, HWPF_COMP_ID); // just commit and try the next module } } if (l_errl) { break; } } //now do the master OCC if( masterProc0 ) { l_errl = HBOCC::stopOCC( masterProc0, masterProc1 ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed on master"); break; } } } while(0); //make sure we always unload the module if we loaded it if (winkle_loaded) { #ifndef __HOSTBOOT_RUNTIME errlHndl_t l_tmpErrl = VFS::module_unload( "libbuild_winkle_images.so" ); if ( l_tmpErrl ) { TRACFCOMP( g_fapiTd,ERR_MRK"stopAllOCCs: Error unloading build_winkle module" ); if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } #endif } TRACFCOMP( g_fapiTd,EXIT_MRK"stopAllOCCs" ); return l_errl; }
errlHndl_t ErrDataService::GenerateSrcPfa( ATTENTION_TYPE i_attnType, ServiceDataCollector & io_sdc, bool & o_initiateHwudump, TargetHandle_t & o_dumpTrgt, errlHndl_t & o_dumpErrl, uint32_t & o_dumpErrlActions) { #define PRDF_FUNC "[ErrDataService::GenerateSrcPfa] " o_initiateHwudump = false; o_dumpTrgt = NULL; o_dumpErrl = NULL; o_dumpErrlActions = 0; // First, check if an error log should be committed. Note that there should // always be an error log if there was a system or unit checkstop. if ( io_sdc.queryDontCommitErrl() && MACHINE_CHECK != i_attnType && !io_sdc.IsUnitCS() ) { // User did not want this error log committed. No need to continue. So // delete it and exit. delete iv_errl; iv_errl = NULL; return NULL; } #ifdef __HOSTBOOT_MODULE using namespace ERRORLOG; using namespace HWAS; #else uint8_t sdcSaveFlags = SDC_NO_SAVE_FLAGS; size_t sz_uint8 = sizeof(uint8_t); #endif epubProcedureID thisProcedureID; bool ForceTerminate = false; bool iplDiagMode = false; ++iv_serviceActionCounter; uint16_t PRD_Reason_Code = 0; //************************************************************** // Callout loop to set up Reason code and SRC word 9 //************************************************************** // Must go thru callout list to look for RIOPORT procedure callouts, // since they require the port info to be in SRC Word 9 bool HW = false; bool SW = false; bool SW_High = false; bool SecondLevel = false; uint32_t SrcWord7 = 0; uint32_t SrcWord9 = 0; // Should not gard hardware if there is a hardware callout at LOW priority // and a symbolic FRU indicating a possibility of a software error at MED or // HIGH priority. bool sappSwNoGardReq = false, sappHwNoGardReq = false; const SDC_MRU_LIST & mruList = io_sdc.getMruList(); int32_t calloutsPlusDimms = mruList.size(); for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { PRDcallout thiscallout = it->callout; if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() ) { if ( (SP_CODE == thiscallout.flatten()) || (SYS_SW_CODE == thiscallout.flatten()) ) { SW = true; if ( MRU_LOW != it->priority ) { sappSwNoGardReq = true; } if ( MRU_MED == it->priority ) { SW_High = true; } } else if ( LEVEL2_SUPPORT == thiscallout.flatten()) { SecondLevel = true; if ( MRU_LOW != it->priority ) { sappSwNoGardReq = true; } } } else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() ) { MemoryMru memMru (thiscallout.flatten()); SrcWord9 = memMru.toUint32(); // Get MemMru value TargetHandleList partList = memMru.getCalloutList(); uint32_t partCount = partList.size(); calloutsPlusDimms = calloutsPlusDimms + partCount -1; HW = true; //hardware callout if ( MRU_LOW == it->priority ) { sappHwNoGardReq = true; } } else // PRDcalloutData::TYPE_TARGET { HW = true; // Hardware callout // Determines if all the hardware callouts have low priority. if ( MRU_LOW == it->priority ) { sappHwNoGardReq = true; } } } //////////////////////////////////////////////////////////////// //Set the PRD Reason Code based on the flags set in the above callout loop. //////////////////////////////////////////////////////////////// if (HW == true && SW == true) { if (SW_High == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE_PROBABLE; else PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; } else if (HW == true && SW == false && SecondLevel == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; else if (HW == true && SW == false && SecondLevel == false) PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE; else if (HW == false && SW == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE; else { // If we get here both HW and SW flags were false. Callout may be // Second Level Support only, or a procedure not checked in the SW // flag code. PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; } SrcWord7 = io_sdc.getPrimaryAttnType() << 8; SrcWord7 |= io_sdc.getSecondaryAttnType(); //-------------------------------------------------------------------------- // Check for IPL Diag Mode //-------------------------------------------------------------------------- #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) iplDiagMode = PlatServices::isInMdiaMode(); #endif //************************************************************** // Update Error Log with SRC //************************************************************** ErrorSignature * esig = io_sdc.GetErrorSignature(); updateSrc( esig->getChipId(), SrcWord7, esig->getSigId(), SrcWord9, PRD_Reason_Code); //************************************************************** // Add SDC Capture data to Error Log User Data here only if // there are 4 or more callouts, // (including Dimm callouts in the MemoryMru). //************************************************************** bool capDataAdded = false; if (calloutsPlusDimms > 3) { AddCapData( io_sdc.GetCaptureData(), iv_errl ); AddCapData( io_sdc.getTraceArrayData(), iv_errl ); capDataAdded = true; } //-------------------------------------------------------------------------- // Set the error log severity and get the error log action flags. //-------------------------------------------------------------------------- // Let's assume the default is the action for a system checkstop. #ifdef __HOSTBOOT_MODULE errlSeverity_t errlSev = ERRL_SEV_UNRECOVERABLE; #else errlSeverity errlSev = ERRL_SEV_UNRECOVERABLE; #endif uint32_t errlAct = ERRL_ACTION_SA | // Service action required. ERRL_ACTION_REPORT | // Report to HMC and hypervisor. ERRL_ACTION_CALL_HOME; // Call home. if ( MACHINE_CHECK != i_attnType ) // Anything other that a system checkstop { if ( io_sdc.queryServiceCall() ) // still a serviceable event { errlSev = ERRL_SEV_PREDICTIVE; } else // not a serviceable event { errlSev = io_sdc.queryLogging() ? ERRL_SEV_RECOVERED // should still be logged : ERRL_SEV_INFORMATIONAL; // can be ignored errlAct = ERRL_ACTION_HIDDEN; } } // This needs to be done after setting the SRCs otherwise it will be // overridden. iv_errl->setSev( errlSev ); // Add procedure callout for SUE attentions. The intent is to make sure the // customer looks for other service actions before replacing parts for this // attention. if ( io_sdc.IsSUE() ) { PRDF_HW_ADD_PROC_CALLOUT( SUE_PREV_ERR, MRU_HIGH, iv_errl, errlSev ); } //-------------------------------------------------------------------------- // Get the global gard policy. //-------------------------------------------------------------------------- HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_NULL; // Gard only if the error is a serviceable event. if ( io_sdc.queryServiceCall() ) { // We will not Resource Recover on a checkstop attention. gardPolicy = ( MACHINE_CHECK == i_attnType ) ? HWAS::GARD_Fatal : HWAS::GARD_Predictive; } if ( io_sdc.IsSUE() && ( MACHINE_CHECK == i_attnType ) ) { // If we are logging an error for an SUE consumed, we should not // perform any GARD here. Appropriate resources should have already // been GARDed for the original UE. gardPolicy = HWAS::GARD_NULL; } // Apply special policies for OPAL. if ( isHyprConfigOpal() && // OPAL is used !isMfgAvpEnabled() && !isMfgHdatAvpEnabled() ) // No AVPs running { // OPAL has requested that we disable garding for predictive errors // found at runtime. if ( HWAS::GARD_Predictive == gardPolicy ) { #if !defined(__HOSTBOOT_MODULE) // FSP only if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL; #elif defined(__HOSTBOOT_RUNTIME) // HBRT only gardPolicy = HWAS::GARD_NULL; #endif } // OPAL has requested that we diable garding for fatal errors (system // checkstops) that could have been caused by a software generated // attention at runtime. This will be determined if there is a software // callout with higher priority than a hardware callout. else if ( HWAS::GARD_Fatal == gardPolicy && sappSwNoGardReq && sappHwNoGardReq ) // Gard requirements met { #if !defined(__HOSTBOOT_MODULE) // FSP only if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL; #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS // Checkstop analysis is only done at the beginning of the IPL, // regardless if the checkstop actually occurred during the IPL // or at runtime. We will need to check the IPL state in FIR // data to determine when the checkstop occurred. // Get access to IPL state info from the FIR data in the PNOR. if ( !(PnorFirDataReader::getPnorFirDataReader().isIplState()) ) gardPolicy = HWAS::GARD_NULL; #endif #endif } } //-------------------------------------------------------------------------- // Get the global deconfig policy (must be done after setting gard policy). //-------------------------------------------------------------------------- HWAS::DeconfigEnum deconfigPolicy = HWAS::NO_DECONFIG; bool deferDeconfig = false; if ( HWAS::GARD_NULL != gardPolicy ) { #if !defined(__HOSTBOOT_MODULE) // FSP only // Change the deconfig state based the gard type. This is only required // to control what the FSP does during the reconfig loop. deconfigPolicy = HWSV::SvrError::isInHwReconfLoop() ? HWAS::DECONFIG : HWAS::NO_DECONFIG; #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only // Deferred Deconfig should be used throughout all of Hostboot (both // checkForIplAttns() and MDIA). deconfigPolicy = HWAS::DECONFIG; deferDeconfig = true; #endif } //-------------------------------------------------------------------------- // Get the HCDB diagnostics policy. //-------------------------------------------------------------------------- HWSV::hwsvDiagUpdate l_diagUpdate = HWSV::HWSV_DIAG_NEEDED; if ( ERRL_ACTION_HIDDEN == errlAct ) { // Diagnostics is not needed in the next IPL cycle for non-visible logs. l_diagUpdate = HWSV::HWSV_DIAG_NOT_NEEDED; } //-------------------------------------------------------------------------- // Initialize the PFA data //-------------------------------------------------------------------------- PfaData pfaData; initPfaData( io_sdc, i_attnType, deferDeconfig, errlAct, errlSev, gardPolicy, pfaData, o_dumpTrgt ); //-------------------------------------------------------------------------- // Add each mru/callout to the error log. //-------------------------------------------------------------------------- for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { PRDcallout thiscallout = it->callout; PRDpriority thispriority = it->priority; // Use the global gard/deconfig policies as default. HWAS::GARD_ErrorType thisGard = gardPolicy; HWAS::DeconfigEnum thisDeconfig = deconfigPolicy; // Change the gard/deconfig actions if this MRU should not be garded. if ( NO_GARD == it->gardState ) { thisGard = HWAS::GARD_NULL; thisDeconfig = HWAS::NO_DECONFIG; } // Add the callout to the PFA data addCalloutToPfaData( pfaData, thiscallout, thispriority, thisGard ); // Add callout based on callout type. if( PRDcalloutData::TYPE_TARGET == thiscallout.getType() ) { PRDF_HW_ADD_CALLOUT(thiscallout.getTarget(), thispriority, thisDeconfig, iv_errl, thisGard, errlSev, l_diagUpdate); } else if(PRDcalloutData::TYPE_PROCCLK == thiscallout.getType() || PRDcalloutData::TYPE_PCICLK == thiscallout.getType()) { PRDF_ADD_CLOCK_CALLOUT(iv_errl, thiscallout.getTarget(), thiscallout.getType(), thispriority, thisDeconfig, thisGard); } else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() ) { MemoryMru memMru (thiscallout.flatten()); TargetHandleList partList = memMru.getCalloutList(); for ( TargetHandleList::iterator it = partList.begin(); it != partList.end(); it++ ) { PRDF_HW_ADD_CALLOUT( *it, thispriority, thisDeconfig, iv_errl, thisGard, errlSev, l_diagUpdate ); } } else if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() ) { thisProcedureID = epubProcedureID(thiscallout.flatten()); PRDF_DTRAC( PRDF_FUNC "thisProcedureID: %x, thispriority: %x, " "errlSev: %x", thisProcedureID, thispriority,errlSev ); PRDF_HW_ADD_PROC_CALLOUT(thisProcedureID, thispriority, iv_errl, errlSev); // Use the flags set earlier to determine if the callout is just // Software (SP code or Phyp Code). Add a Second Level Support // procedure callout Low, for this case. if (HW == false && SW == true && SecondLevel == false) { PRDF_DTRAC( PRDF_FUNC "thisProcedureID= %x, thispriority=%x, " "errlSev=%x", LEVEL2_SUPPORT, MRU_LOW, errlSev ); PRDF_HW_ADD_PROC_CALLOUT( LEVEL2_SUPPORT, MRU_LOW, iv_errl, errlSev ); SecondLevel = true; } } } // Send the dynamic memory Dealloc message for DIMMS for Predictive // callouts. // We can not check for ERRL severity here as there are some cases // e.g. DD02 where we create a Predictive error log but callouts // are not predictive. if ( HWAS::GARD_Predictive == gardPolicy ) { deallocateDimms( mruList ); } //************************************************************** // Check for Terminating the system for non mnfg conditions. //************************************************************** ForceTerminate = checkForceTerm( io_sdc, o_dumpTrgt, pfaData ); //************************************************************* // Check for Manufacturing Mode terminate here and then do // the needed overrides on ForceTerminate flag. //************************************************************* if ( PlatServices::mnfgTerminate() && !ForceTerminate ) { ForceTerminate = true; if ( !((errlSev == ERRL_SEV_RECOVERED) || (errlSev == ERRL_SEV_INFORMATIONAL)) && iplDiagMode && !HW ) { //Terminate in Manufacturing Mode, in IPL mode, for visible log, with no HW callouts. PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG); } // Do not terminate if recoverable or informational. // Do not terminate if deferred deconfig. else if ( deferDeconfig || (errlSev == ERRL_SEV_RECOVERED ) || (errlSev == ERRL_SEV_INFORMATIONAL) ) { ForceTerminate = false; errlAct |= ERRL_ACTION_DONT_TERMINATE; } else { PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG); } pfaData.errlActions = errlAct; } // Needed to move the errl add user data sections here because of some updates // of the data required in the Aysnc section for the SMA dual reporting fix. //************************************************************** // Add the PFA data to Error Log User Data //************************************************************** UtilMem l_membuf; l_membuf << pfaData; PRDF_ADD_FFDC( iv_errl, (const char*)l_membuf.base(), l_membuf.size(), ErrlVer1, ErrlSectPFA5_1 ); //************************************************************** // Add SDC Capture data to Error Log User Data //************************************************************** // Pulled some code out to incorporate into AddCapData // Check to make sure Capture Data wasn't added earlier. if (!capDataAdded) { AddCapData( io_sdc.GetCaptureData(), iv_errl ); AddCapData( io_sdc.getTraceArrayData(), iv_errl ); } //************************************************************************** // Add extended MemoryMru error log sections (if needed). //************************************************************************** for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { // Operate only on MemoryMru callouts. if ( PRDcalloutData::TYPE_MEMMRU != it->callout.getType() ) continue; /* TODO RTC 136125 // Only add single DIMM callouts. Otherwise, the parsed data is // redundant. MemoryMru memMru ( it->callout.flatten() ); if ( !memMru.getSymbol().isValid() ) continue; // Add the MemoryMru to the capture data. CenMbaCaptureData::addExtMemMruData( memMru, iv_errl ); */ } //************************************************************************** // Additional FFDC //************************************************************************** // For OP checkstop analysis, add a string indicating a system checkstop // occurred and when. This will be printed out in the console traces along // with the error log. #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) // IPL only #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS if ( MACHINE_CHECK == i_attnType ) { const char * const str = PnorFirDataReader::getPnorFirDataReader().isIplState() ? "System checkstop occurred during IPL on previous boot" : "System checkstop occurred during runtime on previous boot"; ErrlUserDetailsString(str).addToLog(iv_errl); } #endif #endif // Collect PRD traces. // NOTE: Each line of a trace is on average 36 bytes so 768 bytes should get // us around 21 lines of trace output. PRDF_COLLECT_TRACE(iv_errl, 768); //************************************************************** // Commit the error log. // This will also perform Gard and Deconfig actions. // Do the Unit Dumps if needed. //************************************************************** // Add the MNFG trace information. MnfgTrace( io_sdc.GetErrorSignature(), pfaData ); // If this is not a terminating condition, commit the error log. If the // error log is not committed, the error log will be passed back to // PRDF::main() and eventually ATTN. if ( MACHINE_CHECK != pfaData.priAttnType && !ForceTerminate && !pfaData.TERMINATE ) { // Handle any unit checkstop conditions, if needed (i.e. runtime // deconfiguration, dump/FFDC collection, etc. if ( io_sdc.IsUnitCS() && !io_sdc.IsUsingSavedSdc() ) { handleUnitCS( io_sdc, o_dumpTrgt, o_initiateHwudump ); } if ( true == o_initiateHwudump ) { // the dump log will be deleted later in PRDF::main // after the hwudump is initiated there. o_dumpErrl = iv_errl; iv_errl = NULL; o_dumpErrlActions = errlAct; PRDF_TRAC( PRDF_FUNC "for target: 0x%08x, i_errl: 0x%08x, " "i_errlActions: 0x%08x", getHuid(o_dumpTrgt), ERRL_GETRC_SAFE(o_dumpErrl), o_dumpErrlActions ); } else { // Commit the error log. commitErrLog( iv_errl, pfaData ); } } #ifndef __HOSTBOOT_MODULE errlHndl_t reg_errl = UtilReg::read ("prdf/RasServices", &sdcSaveFlags, sz_uint8); if (reg_errl) { PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry read" ); PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT); } else { //Turn off indicator that there is saved Sdc Analysis info sdcSaveFlags &= ( ~SDC_ANALYSIS_SAVE_FLAG ); reg_errl = UtilReg::write ("prdf/RasServices", &sdcSaveFlags, sz_uint8); if (reg_errl) { PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry write" ); PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT); } } #endif PRDF_INF( PRDF_FUNC "PRD called to analyze an error: 0x%08x 0x%08x", esig->getChipId(), esig->getSigId() ); // Reset iv_errl to NULL. This is done to catch logical bug in our code. // It enables us to assert in createInitialErrl function if iv_errl is // not NULL which should catch any logical bug in initial stages of testing. errlHndl_t o_errl = iv_errl; iv_errl = NULL; return o_errl; #undef PRDF_FUNC }
errlHndl_t runStep(const TargetHandleList & i_targetList) { MDIA_FAST("memory diagnostics entry with %d target(s)", i_targetList.size()); // memory diagnostics ipl step entry point errlHndl_t err = 0; Globals globals; TargetHandle_t top = 0; targetService().getTopLevelTarget(top); if(top) { globals.mfgPolicy = top->getAttr<ATTR_MNFG_FLAGS>(); uint8_t maxMemPatterns = top->getAttr<ATTR_RUN_MAX_MEM_PATTERNS>(); // This registry / attr is the same as the // exhaustive mnfg one if(maxMemPatterns) { globals.mfgPolicy |= MNFG_FLAG_ENABLE_EXHAUSTIVE_PATTERN_TEST; } globals.simicsRunning = Util::isSimicsRunning(); globals.disableScrubs = top->getAttr<ATTR_DISABLE_SCRUB_AFTER_PATTERN_TEST>(); } // get the workflow for each target mba passed in. // associate each workflow with the target handle. WorkFlowAssocMap list; TargetHandleList::const_iterator tit; DiagMode mode; for(tit = i_targetList.begin(); tit != i_targetList.end(); ++tit) { err = getMbaDiagnosticMode(globals, *tit, mode); if(err) { break; } err = getMbaWorkFlow(mode, list[*tit], globals); if(err) { break; } } if(!err) { // set global data Singleton<StateMachine>::instance().setGlobals(globals); // TODO...run the workflow through the state machine err = Singleton<StateMachine>::instance().run(list); } // ensure threads and pools are shutdown when finished doStepCleanup(globals); return err; }
void ServiceCommon::processAttentions(const TargetHandleList & i_procs) { errlHndl_t err = NULL; AttentionList attentions; // this should be the opposite of what we used for masking in preAck uint64_t restoreMask = ~(HostMask::host() | HostMask::nonHost()); ProcOps & procOps = getProcOps(); do { attentions.clear(); // enumerate the highest priority pending attention // on every chip and then give the entire set to PRD TargetHandleList::const_iterator pit = i_procs.end(); while(pit-- != i_procs.begin()) { // enumerate proc local attentions (xstp,spcl,rec). err = procOps.resolveIpoll(*pit, attentions); if(err) { errlCommit(err, ATTN_COMP_ID); } } err = getPrdWrapper().callPrd(attentions); if(err) { errlCommit(err, ATTN_COMP_ID); } // unmask proc local attentions // (xstp,rec,special) in ipoll mask // any pending attentions will be found // on the next pass pit = i_procs.end(); while(pit-- != i_procs.begin()) { mutex_lock(&iv_mutex); // the other thread might be trying to mask // on the same target. The mutex ensures // neither thread corrupts the register. err = modifyScom( *pit, IPOLL::address, restoreMask, SCOM_AND); mutex_unlock(&iv_mutex); ATTN_TRACE("ProcessATTN:: IPOLL RESTORE :%llx", restoreMask ); if(err) { errlCommit(err, ATTN_COMP_ID); } } // if on a given Centaur with a pending attention // on an MBA, an attention comes on in the other MBA // we don't get an interrupt for that. So make another // pass and check for that. } while(!attentions.empty()); }
/** * @brief Starts OCCs on all Processors in the node * This is intended to be used for AVP testing. * * @param[out] o_failedOccTarget: Pointer to the target failing * loadnStartAllOccs * @param[in] i_useSRAM: bool - use SRAM for OCC image, ie during IPL * true if during IPL, false if at end of IPL (default) * @return errlHndl_t Error log if OCC load failed */ errlHndl_t loadnStartAllOccs(TARGETING::Target *& o_failedOccTarget, bool i_useSRAM) { errlHndl_t l_errl = NULL; void* homerVirtAddrBase = NULL; uint64_t homerPhysAddrBase = VMM_HOMER_REGION_START_ADDR; bool winkle_loaded = false; TRACUCOMP( g_fapiTd, ENTER_MRK"loadnStartAllOccs(%d)", i_useSRAM); do { #ifndef __HOSTBOOT_RUNTIME //OCC requires the build_winkle_images library if ( !VFS::module_is_loaded( "libbuild_winkle_images.so" ) ) { l_errl = VFS::module_load( "libbuild_winkle_images.so" ); if ( l_errl ) { // load module returned with errl set TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" ); // break from do loop if error occured break; } winkle_loaded = true; } assert(VMM_HOMER_REGION_SIZE <= THIRTYTWO_GB, "loadnStartAllOccs: Unsupported HOMER Region size"); if (!i_useSRAM) { //If running Sapphire need to place this at the top of memory if(TARGETING::is_sapphire_load()) { homerPhysAddrBase = TARGETING::get_top_mem_addr(); assert (homerPhysAddrBase != 0, "loadnStartAllOccs: Top of memory was 0!"); homerPhysAddrBase -= VMM_ALL_HOMER_OCC_MEMORY_SIZE; } TRACFCOMP( g_fapiTd, "HOMER is at %.16X", homerPhysAddrBase ); //Map entire homer region into virtual memory homerVirtAddrBase = mm_block_map(reinterpret_cast<void*>(homerPhysAddrBase), VMM_HOMER_REGION_SIZE); TRACFCOMP( g_fapiTd, "HOMER virtaddrbase %.16X", homerVirtAddrBase ); } else { // malloc space big enough for all of OCC homerVirtAddrBase = (void *)malloc(1 * MEGABYTE); homerPhysAddrBase = mm_virt_to_phys(homerVirtAddrBase); } #endif if (i_useSRAM) { // OCC is going into L3 and SRAM so only need 1 prime and load // into the Master Proc TargetService & tS = targetService(); Target * sysTarget = NULL; tS.getTopLevelTarget( sysTarget ); assert( sysTarget != NULL ); Target* masterproc = NULL; tS.masterProcChipTargetHandle( masterproc ); /******* SETUP AND LOAD **************/ l_errl = primeAndLoadOcc (masterproc, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = masterproc; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs:primeAndLoadOcc failed"); free(homerVirtAddrBase); break; } /********* START OCC *************/ l_errl = HBOCC::startOCC (masterproc, NULL, o_failedOccTarget); // it either started or errored; either way, free the memory free(homerVirtAddrBase); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed"); break; } } else { TargetHandleList procChips; getAllChips(procChips, TYPE_PROC, true); if(procChips.size() == 0) { TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" ); //We'll never get this far in the IPL without any processors, // so just exit. break; } TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: %d procs found", procChips.size()); TargetHandleList::iterator itr1 = procChips.begin(); //The OCC Procedures require processors within a DCM be //setup together. So, first checking if any proc has //DCM installed attribute set. If not, we can iterate //over the list in any order. //If DCM installed is set, we work under the assumption //that each nodeID is a DCM. So sort the list by NodeID //then call OCC Procedures on NodeID pairs. if(0 == (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>()) { TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: non-dcm path entered"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { /******* SETUP AND LOAD **************/ l_errl = primeAndLoadOcc (*itr, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = *itr; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs:primeAndLoadOcc failed"); break; } /********* START OCC *************/ l_errl = HBOCC::startOCC (*itr, NULL, o_failedOccTarget); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed"); break; } } if (l_errl) { break; } } else { TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: Following DCM Path"); std::sort(procChips.begin(), procChips.end(), orderByNodeAndPosition); TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: procChips list sorted"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { TRACUCOMP( g_fapiImpTd, INFO_MRK"loadnStartAllOccs: Insepcting first target" ); Target* targ0 = *itr; Target* targ1 = NULL; TRACUCOMP( g_fapiImpTd, INFO_MRK "loadnStartAllOccs: Cur target nodeID=%d", targ0->getAttr<ATTR_FABRIC_NODE_ID>()); //if the next target in the list is in the same node // they are on the same DCM, so bump itr forward // and update targ1 pointer if((itr+1) != procChips.end()) { TRACUCOMP( g_fapiImpTd, INFO_MRK "loadnStartAllOccs: n+1 target nodeID=%d", ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>()) ); if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) == ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())) { itr++; targ1 = *itr; } } /********** Setup and load targ0 ***********/ l_errl = primeAndLoadOcc (targ0, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = targ0; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: " "primeAndLoadOcc failed on targ0"); break; } /*********** Setup and load targ1 **********/ l_errl = primeAndLoadOcc (targ1, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = targ1; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: " "primeAndLoadOcc failed on targ1"); break; } /*********** Start OCC *******************/ l_errl = HBOCC::startOCC (targ0, targ1, o_failedOccTarget); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: start failed"); break; } } if (l_errl) { break; } } } } while(0); errlHndl_t l_tmpErrl = NULL; //Unless HTMGT is in use, there are no further accesses to HOMER memory //required during the IPL #ifndef CONFIG_HTMGT if(homerVirtAddrBase) { int rc = 0; rc = mm_block_unmap(homerVirtAddrBase); if (rc != 0) { /*@ * @errortype * @moduleid fapi::MOD_OCC_LOAD_START_ALL_OCCS * @reasoncode fapi::RC_MM_UNMAP_ERR * @userdata1 Return Code * @userdata2 Unmap address * @devdesc mm_block_unmap() returns error * @custdesc A problem occurred during the IPL * of the system. */ l_tmpErrl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, fapi::MOD_OCC_LOAD_START_ALL_OCCS, fapi::RC_MM_UNMAP_ERR, rc, reinterpret_cast<uint64_t> (homerVirtAddrBase)); if(l_tmpErrl) { if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } } } #endif //make sure we always unload the module if (winkle_loaded) { l_tmpErrl = VFS::module_unload( "libbuild_winkle_images.so" ); if ( l_tmpErrl ) { TRACFCOMP ( g_fapiTd,ERR_MRK "loadnStartAllOccs: Error unloading build_winkle module" ); if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } } TRACUCOMP( g_fapiTd, EXIT_MRK"loadnStartAllOccs" ); return l_errl; }