TargetHandle_t TargetServiceImpl::getMcs( TargetHandle_t i_proc, uint64_t i_pos) { PredicateCTM classTypeMatch(CLASS_UNIT, TYPE_MCS); PredicateIsFunctional functionalMatch; PredicatePostfixExpr pred; class ChipUnitMatch : public PredicateBase { uint8_t iv_pos; public: bool operator()(const Target * i_target) const { uint8_t pos; bool match = false; if(i_target->tryGetAttr<ATTR_CHIP_UNIT>(pos)) { match = iv_pos == pos; } return match; } explicit ChipUnitMatch(uint8_t i_pos) : iv_pos(i_pos) {} } chipUnitMatch(i_pos); pred.push(&classTypeMatch).push( &functionalMatch).And(); pred.push(&chipUnitMatch).And(); TargetHandleList list; TargetHandle_t mcs = NULL; targetService().getAssociated( list, i_proc, TARGETING::TargetService::CHILD_BY_AFFINITY, TARGETING::TargetService::ALL, &pred); if(list.size() == 1) { mcs = list[0]; } return mcs; }
/** * @brief Optional plugin function called after analysis is complete but * before PRD exits. * @param i_cenChip A Centaur MBA chip. * @param i_sc The step code data struct. * @note This is especially useful for any analysis that still needs to be * done after the framework clears the FIR bits that were at attention. * @return SUCCESS. */ int32_t PllPostAnalysis( ExtensibleChip * i_cenChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[Membuf::PllPostAnalysis] " int32_t o_rc = SUCCESS; TargetHandle_t cenTrgt = i_cenChip->GetChipHandle(); do { // need to clear associated bits in the MCIFIR bits. o_rc = MemUtils::mcifirCleanup( i_cenChip, i_sc ); if( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"mcifirCleanup() failed"); break; } // Check to make sure we are at threshold and have something garded. if ( !i_sc.service_data->IsAtThreshold() || (GardAction::NoGard == i_sc.service_data->QueryGard()) ) { break; // nothing to do } TargetHandleList list = getConnected( cenTrgt, TYPE_MBA ); if ( 0 == list.size() ) { PRDF_ERR( PRDF_FUNC"getConnected(0x%08x, TYPE_MBA) failed", getHuid(cenTrgt) ); o_rc = FAIL; break; } // Send SKIP_MBA message for each MBA. for ( TargetHandleList::iterator mbaIt = list.begin(); mbaIt != list.end(); ++mbaIt ) { int32_t l_rc = mdiaSendEventMsg( *mbaIt, MDIA::SKIP_MBA ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(0x%08x, SKIP_MBA) failed", getHuid(*mbaIt) ); o_rc |= FAIL; continue; // keep going } } } while(0); return o_rc; #undef PRDF_FUNC }
TargetHandle_t TargetServiceImpl::getMembuf( TargetHandle_t i_mcs) { TargetHandle_t membuf = NULL; TargetHandleList list; getChildAffinityTargets(list, i_mcs, CLASS_CHIP, TYPE_MEMBUF); if(list.size() == 1) { membuf = list[0]; } return membuf; }
TargetHandle_t TargetServiceImpl::getMcs( TargetHandle_t i_membuf) { TargetHandle_t mcs = NULL; TargetHandleList list; PredicateCTM pred(CLASS_UNIT, TYPE_MCS); targetService().getAssociated( list, i_membuf, TARGETING::TargetService::PARENT_BY_AFFINITY, TARGETING::TargetService::IMMEDIATE, &pred); if(list.size() == 1) { mcs = list[0]; } return mcs; }
TargetHandle_t TargetServiceImpl::getProc( TargetHandle_t i_membuf) { TargetHandle_t proc = NULL; TargetHandleList list; PredicateCTM pred(CLASS_CHIP, TYPE_PROC); targetService().getAssociated( list, i_membuf, TARGETING::TargetService::PARENT_BY_AFFINITY, TARGETING::TargetService::ALL, &pred); if(list.size() == 1) { proc = list[0]; } return proc; }
errlHndl_t startScrub() { #define PRDF_FUNC "[PRDF::startScrub] " PRDF_ENTER( PRDF_FUNC ); errlHndl_t o_errl = NULL; int32_t l_rc = SUCCESS; HUID nodeId = INVALID_HUID; // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; do { // Since the last refresh is in istep10 host_prd_hwreconfig, // it may be good to call it again here at istep16 mss_scrub // to remove any non-functional MBAs from PRD system model. o_errl = noLock_refresh(); // This shouldn't return any error but if it does, break out if(NULL != o_errl) { PRDF_ERR( PRDF_FUNC"noLock_refresh() failed" ); break; } // This is run in Hostboot so there should only be one node. TargetHandleList list = getFunctionalTargetList( TYPE_NODE ); if ( 1 != list.size() ) { PRDF_ERR( PRDF_FUNC"getFunctionalTargetList(TYPE_NODE) failed" ); l_rc = FAIL; break; } nodeId = getHuid(list[0]); PRDF_ENTER( PRDF_FUNC"HUID=0x%08x", nodeId ); // Start scrubbing on all MBAs. MbaDomain * domain = (MbaDomain *)systemPtr->GetDomain(MBA_DOMAIN); if ( NULL == domain ) { PRDF_ERR( PRDF_FUNC"MBA_DOMAIN not found. nodeId=0x%08x", nodeId ); l_rc = FAIL; break; } l_rc = domain->startScrub(); PRDF_EXIT( PRDF_FUNC"HUID=0x%08x", nodeId ); } while (0); if (( SUCCESS != l_rc ) && (NULL == o_errl)) { // Get user data uint64_t ud12 = PRDF_GET_UINT64_FROM_UINT32( nodeId, __LINE__ ); uint64_t ud34 = PRDF_GET_UINT64_FROM_UINT32( 0, 0 ); // Create error log o_errl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity PRDF_START_SCRUB, // module ID PRDF_DETECTED_FAIL_SOFTWARE, // reason code ud12, ud34 ); // user data 1-4 // Add 2nd level support o_errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH ); // Add traces o_errl->collectTrace( PRDF_COMP_NAME, 512 ); } PRDF_EXIT( PRDF_FUNC ); return o_errl; #undef PRDF_FUNC }
errlHndl_t PegasusConfigurator::addDomainChips( TARGETING::TYPE i_type, RuleChipDomain * io_domain, PllDomainList * io_pllDomains, PllDomainList * io_pllDomains2) { using namespace TARGETING; errlHndl_t l_errl = NULL ; // Get references to factory objects. ScanFacility & scanFac = ScanFacility::Access(); ResolutionFactory & resFac = ResolutionFactory::Access(); // Get all targets of specified type and add to given domain. TargetHandleList list = PlatServices::getFunctionalTargetList( i_type ); if ( 0 == list.size() ) { PRDF_ERR( "[addDomainChips] getFunctionalTargetList " "returned empty list for i_type=%d", i_type ); } else { // Get rule filename based on type. const char * fileName = ""; switch ( i_type ) { case TYPE_PROC: // Check which Proc chip type if (MODEL_NAPLES == getProcModel(list[0])) fileName = NaplesProc; else fileName = MuranoVeniceProc; break; case TYPE_EX: fileName = Ex; break; case TYPE_MCS: fileName = Mcs; break; case TYPE_MEMBUF: fileName = Membuf; break; case TYPE_MBA: fileName = Mba; break; default: // Print a trace statement, but do not fail the build. PRDF_ERR( "[addDomainChips] Unsupported target type: %d", i_type ); } for ( TargetHandleList::const_iterator itr = list.begin(); itr != list.end(); ++itr ) { if ( NULL == *itr ) continue; RuleChip * chip = new RuleChip( fileName, *itr, scanFac, resFac,l_errl ); if( NULL != l_errl ) { delete chip; break; } sysChipLst.push_back( chip ); io_domain->AddChip( chip ); // PLL domains switch ( i_type ) { case TYPE_PROC: addChipsToPllDomain(CLOCK_DOMAIN_FAB, io_pllDomains, chip, *itr, scanFac, resFac); addChipsToPllDomain(CLOCK_DOMAIN_IO, io_pllDomains2, chip, *itr, scanFac, resFac); break; case TYPE_MEMBUF: addChipsToPllDomain(CLOCK_DOMAIN_MEMBUF, io_pllDomains, chip, *itr, scanFac, resFac); break; default: break; } } // Flush rule table cache since objects are all built. Prdr::LoadChipCache::flushCache(); } return l_errl; }
int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc ) { #define PRDF_FUNC "[CenMbaTdCtlr::handleUE] " using namespace CalloutUtil; int32_t o_rc = SUCCESS; iv_tdState = NO_OP; // Abort the TD procedure. setTdSignature( io_sc, PRDFSIG_MaintUE ); io_sc.service_data->SetServiceCall(); CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip ); do { // Clean up the maintenance command. This is needed just in case the UE // isolation procedure is modified to use maintenance commands. o_rc = cleanupPrevCmd(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" ); break; } // Look for all failing bits on this rank. CenDqBitmap bitmap; o_rc = mssIplUeIsolation( iv_mbaTrgt, iv_rank, bitmap ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"mssIplUeIsolation() failed" ); break; } // Add UE data to capture data. bitmap.getCaptureData( io_sc.service_data->GetCaptureData() ); // Callout the failing DIMMs. TargetHandleList callouts; for ( int32_t ps = 0; ps < PORT_SLCT_PER_MBA; ps++ ) { bool badDqs = false; o_rc = bitmap.badDqs( ps, badDqs ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"badDqs(%d) failed", ps ); break; } if ( !badDqs ) continue; // nothing to do. TargetHandleList dimms = getConnectedDimms(iv_mbaTrgt, iv_rank, ps); if ( 0 == dimms.size() ) { PRDF_ERR( PRDF_FUNC"getConnectedDimms(%d) failed", ps ); o_rc = FAIL; break; } callouts.insert( callouts.end(), dimms.begin(), dimms.end() ); if ( isMfgCeCheckingEnabled() ) { // As we are doing callout for UE, we dont need to do callout // during CE for this rank on given port mbadb->getIplCeStats()->banAnalysis( iv_rank, ps ); } } if ( SUCCESS != o_rc ) break; if ( 0 == callouts.size() ) { // It is possible the scrub counters have rolled over to zero due to // a known DD1.0 hardware bug. In this case, the best we can do is // callout both DIMMs, because at minimum we know there was a UE, we // just don't know where. // NOTE: If this condition happens because of a DD2.0+ bug, the // mssIplUeIsolation procedure will callout the Centaur. callouts = getConnectedDimms( iv_mbaTrgt, iv_rank ); if ( 0 == callouts.size() ) { PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" ); o_rc = FAIL; break; } if ( isMfgCeCheckingEnabled() ) { // As we are doing callout for UE, we dont need to do callout // during CE for this rank on both port mbadb->getIplCeStats()->banAnalysis( iv_rank); } } // Callout all DIMMs in the list. for ( TargetHandleList::iterator i = callouts.begin(); i != callouts.end(); i++ ) { io_sc.service_data->SetCallout( *i, MRU_HIGH ); } } while(0); return o_rc; #undef PRDF_FUNC }
errlHndl_t discoverTargets() { HWAS_INF("discoverTargets entry"); errlHndl_t errl = NULL; // loop through all the targets and set HWAS_STATE to a known default for (TargetIterator target = targetService().begin(); target != targetService().end(); ++target) { HwasState hwasState = target->getAttr<ATTR_HWAS_STATE>(); hwasState.deconfiguredByEid = 0; hwasState.poweredOn = false; hwasState.present = false; hwasState.functional = false; hwasState.dumpfunctional = false; target->setAttr<ATTR_HWAS_STATE>(hwasState); } // Assumptions and actions: // CLASS_SYS (exactly 1) - mark as present // CLASS_ENC, TYPE_PROC, TYPE_MEMBUF, TYPE_DIMM // (ALL require hardware query) - call platPresenceDetect // \->children: CLASS_* (NONE require hardware query) - mark as present do { // find CLASS_SYS (the top level target) Target* pSys; targetService().getTopLevelTarget(pSys); HWAS_ASSERT(pSys, "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found"); // mark this as present enableHwasState(pSys, true, true, 0); HWAS_DBG("pSys %.8X - marked present", pSys->getAttr<ATTR_HUID>()); // find list of all we need to call platPresenceDetect against PredicateCTM predEnc(CLASS_ENC); PredicateCTM predChip(CLASS_CHIP); PredicateCTM predDimm(CLASS_LOGICAL_CARD, TYPE_DIMM); PredicatePostfixExpr checkExpr; checkExpr.push(&predChip).push(&predDimm).Or().push(&predEnc).Or(); TargetHandleList pCheckPres; targetService().getAssociated( pCheckPres, pSys, TargetService::CHILD, TargetService::ALL, &checkExpr ); // pass this list to the hwas platform-specific api where // pCheckPres will be modified to only have present targets HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); errl = platPresenceDetect(pCheckPres); HWAS_DBG("pCheckPres size: %d", pCheckPres.size()); if (errl != NULL) { break; // break out of the do/while so that we can return } // for each, read their ID/EC level. if that works, // mark them and their descendants as present // read the partialGood vector to determine if any are not functional // and read and store values from the PR keyword // list of procs and data that we'll need to look at the PR keyword procRestrict_t l_procEntry; std::vector <procRestrict_t> l_procPRList; // sort the list by ATTR_HUID to ensure that we // start at the same place each time std::sort(pCheckPres.begin(), pCheckPres.end(), compareTargetHuid); for (TargetHandleList::const_iterator pTarget_it = pCheckPres.begin(); pTarget_it != pCheckPres.end(); ++pTarget_it ) { TargetHandle_t pTarget = *pTarget_it; // if CLASS_ENC is still in this list, mark as present if (pTarget->getAttr<ATTR_CLASS>() == CLASS_ENC) { enableHwasState(pTarget, true, true, 0); HWAS_DBG("pTarget %.8X - CLASS_ENC marked present", pTarget->getAttr<ATTR_HUID>()); // on to the next target continue; } bool chipPresent = true; bool chipFunctional = true; uint32_t errlEid = 0; uint16_t pgData[VPD_CP00_PG_DATA_LENGTH / sizeof(uint16_t)]; bzero(pgData, sizeof(pgData)); if (pTarget->getAttr<ATTR_CLASS>() == CLASS_CHIP) { // read Chip ID/EC data from these physical chips errl = platReadIDEC(pTarget); if (errl) { // read of ID/EC failed even tho we THOUGHT we were present. HWAS_INF("pTarget %.8X - read IDEC failed (eid 0x%X) - bad", errl->eid(), pTarget->getAttr<ATTR_HUID>()); // chip NOT present and NOT functional, so that FSP doesn't // include this for HB to process chipPresent = false; chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else if (pTarget->getAttr<ATTR_TYPE>() == TYPE_PROC) { // read partialGood vector from these as well. errl = platReadPartialGood(pTarget, pgData); if (errl) { // read of PG failed even tho we were present.. HWAS_INF("pTarget %.8X - read PG failed (eid 0x%X)- bad", errl->eid(), pTarget->getAttr<ATTR_HUID>()); chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else // look at the 'nest' logic to override the functionality // of this proc if (pgData[VPD_CP00_PG_PIB_INDEX] != VPD_CP00_PG_PIB_GOOD) { HWAS_INF("pTarget %.8X - PIB pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_PIB_INDEX, VPD_CP00_PG_PIB_GOOD); chipFunctional = false; } else if (pgData[VPD_CP00_PG_PERVASIVE_INDEX] != VPD_CP00_PG_PERVASIVE_GOOD) { HWAS_INF("pTarget %.8X - Pervasive pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_PERVASIVE_INDEX, VPD_CP00_PG_PERVASIVE_GOOD); chipFunctional = false; } else if ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_BASE) != VPD_CP00_PG_POWERBUS_BASE) { HWAS_INF("pTarget %.8X - PowerBus pgPdata[%d]: expected 0x%04X - bad", pTarget->getAttr<ATTR_HUID>(), VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_BASE); chipFunctional = false; } else { // read the PR keywords that we need, so that if // we have errors, we can handle them as approprite. uint8_t prData[VPD_VINI_PR_DATA_LENGTH/sizeof(uint8_t)]; bzero(prData, sizeof(prData)); errl = platReadPR(pTarget, prData); if (errl != NULL) { // read of PR keyword failed HWAS_INF("pTarget %.8X - read PR failed - bad", pTarget->getAttr<ATTR_HUID>()); chipFunctional = false; errlEid = errl->eid(); // commit the error but keep going errlCommit(errl, HWAS_COMP_ID); // errl is now NULL } else { // save info so that we can // process the PR keyword after this loop HWAS_INF("pTarget %.8X - pushing to procPRlist; FRU_ID %d", pTarget->getAttr<ATTR_HUID>(), pTarget->getAttr<ATTR_FRU_ID>()); l_procEntry.target = pTarget; l_procEntry.group = pTarget->getAttr<ATTR_FRU_ID>(); l_procEntry.procs = (prData[7] & VPD_VINI_PR_B7_MASK) + 1; l_procEntry.maxEXs = l_procEntry.procs * (prData[2] & VPD_VINI_PR_B2_MASK) >> VPD_VINI_PR_B2_SHIFT; l_procPRList.push_back(l_procEntry); if (l_procEntry.maxEXs == 0) { // this is PROBABLY bad PR, so YELL... HWAS_ERR("pTarget %.8X - PR VPD says 0 CORES", pTarget->getAttr<ATTR_HUID>()); } } } } // TYPE_PROC } // CLASS_CHIP HWAS_DBG("pTarget %.8X - detected present, %sfunctional", pTarget->getAttr<ATTR_HUID>(), chipFunctional ? "" : "NOT "); // now need to mark all of this target's // physical descendants as present and functional as appropriate TargetHandleList pDescList; targetService().getAssociated( pDescList, pTarget, TargetService::CHILD, TargetService::ALL); for (TargetHandleList::const_iterator pDesc_it = pDescList.begin(); pDesc_it != pDescList.end(); ++pDesc_it) { TargetHandle_t pDesc = *pDesc_it; // by default, the descendant's functionality is 'inherited' bool descFunctional = chipFunctional; if (chipFunctional) { // if the chip is functional, the look through the // partialGood vector to see if its chiplets // are functional if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_XBUS) && (pgData[VPD_CP00_PG_XBUS_INDEX] != VPD_CP00_PG_XBUS_GOOD)) { HWAS_INF("pDesc %.8X - XBUS pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_XBUS_INDEX, VPD_CP00_PG_XBUS_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_ABUS) && (pgData[VPD_CP00_PG_ABUS_INDEX] != VPD_CP00_PG_ABUS_GOOD)) { HWAS_INF("pDesc %.8X - ABUS pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_ABUS_INDEX, VPD_CP00_PG_ABUS_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_PCI) && (pgData[VPD_CP00_PG_PCIE_INDEX] != VPD_CP00_PG_PCIE_GOOD)) { HWAS_INF("pDesc %.8X - PCIe pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), VPD_CP00_PG_PCIE_INDEX, VPD_CP00_PG_PCIE_GOOD); descFunctional = false; } else if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_EX) || (pDesc->getAttr<ATTR_TYPE>() == TYPE_CORE) ) { ATTR_CHIP_UNIT_type indexEX = pDesc->getAttr<ATTR_CHIP_UNIT>(); if (pgData[VPD_CP00_PG_EX0_INDEX + indexEX] != VPD_CP00_PG_EX0_GOOD) { HWAS_INF("pDesc %.8X - CORE/EX%d pgPdata[%d]: expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexEX, VPD_CP00_PG_EX0_INDEX + indexEX, VPD_CP00_PG_EX0_GOOD); descFunctional = false; } } else if (pDesc->getAttr<ATTR_TYPE>() == TYPE_MCS) { ATTR_CHIP_UNIT_type indexMCS = pDesc->getAttr<ATTR_CHIP_UNIT>(); // check: MCS 0..3 in MCL, MCS 4..7 in MCR if (((indexMCS >=0) && (indexMCS <=3)) && ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_MCL) == 0)) { HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCL expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexMCS, VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_MCL); descFunctional = false; } else if (((indexMCS >=4) && (indexMCS <=7)) && ((pgData[VPD_CP00_PG_POWERBUS_INDEX] & VPD_CP00_PG_POWERBUS_MCR) == 0)) { HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCR expected 0x%04X - bad", pDesc->getAttr<ATTR_HUID>(), indexMCS, VPD_CP00_PG_POWERBUS_INDEX, VPD_CP00_PG_POWERBUS_MCR); descFunctional = false; } } } // chipFunctional // for sub-parts, if it's not functional, it's not present. enableHwasState(pDesc, descFunctional, descFunctional, errlEid); HWAS_DBG("pDesc %.8X - marked %spresent, %sfunctional", pDesc->getAttr<ATTR_HUID>(), descFunctional ? "" : "NOT ", descFunctional ? "" : "NOT "); } // set HWAS state to show CHIP is present, functional per above enableHwasState(pTarget, chipPresent, chipFunctional, errlEid); } // for pTarget_it
errlHndl_t runStep(const TargetHandleList & i_targetList) { MDIA_FAST("memory diagnostics entry with %d target(s)", i_targetList.size()); // memory diagnostics ipl step entry point errlHndl_t err = 0; Globals globals; TargetHandle_t top = 0; targetService().getTopLevelTarget(top); if(top) { globals.mfgPolicy = top->getAttr<ATTR_MNFG_FLAGS>(); uint8_t maxMemPatterns = top->getAttr<ATTR_RUN_MAX_MEM_PATTERNS>(); // This registry / attr is the same as the // exhaustive mnfg one if(maxMemPatterns) { globals.mfgPolicy |= MNFG_FLAG_ENABLE_EXHAUSTIVE_PATTERN_TEST; } globals.simicsRunning = Util::isSimicsRunning(); globals.disableScrubs = top->getAttr<ATTR_DISABLE_SCRUB_AFTER_PATTERN_TEST>(); } // get the workflow for each target mba passed in. // associate each workflow with the target handle. WorkFlowAssocMap list; TargetHandleList::const_iterator tit; DiagMode mode; for(tit = i_targetList.begin(); tit != i_targetList.end(); ++tit) { err = getMbaDiagnosticMode(globals, *tit, mode); if(err) { break; } err = getMbaWorkFlow(mode, list[*tit], globals); if(err) { break; } } if(!err) { // set global data Singleton<StateMachine>::instance().setGlobals(globals); // TODO...run the workflow through the state machine err = Singleton<StateMachine>::instance().run(list); } // ensure threads and pools are shutdown when finished doStepCleanup(globals); return err; }
errlHndl_t ErrDataService::GenerateSrcPfa( ATTENTION_TYPE i_attnType, ServiceDataCollector & io_sdc, bool & o_initiateHwudump, TargetHandle_t & o_dumpTrgt, errlHndl_t & o_dumpErrl, uint32_t & o_dumpErrlActions) { #define PRDF_FUNC "[ErrDataService::GenerateSrcPfa] " o_initiateHwudump = false; o_dumpTrgt = NULL; o_dumpErrl = NULL; o_dumpErrlActions = 0; // First, check if an error log should be committed. Note that there should // always be an error log if there was a system or unit checkstop. if ( io_sdc.queryDontCommitErrl() && MACHINE_CHECK != i_attnType && !io_sdc.IsUnitCS() ) { // User did not want this error log committed. No need to continue. So // delete it and exit. delete iv_errl; iv_errl = NULL; return NULL; } #ifdef __HOSTBOOT_MODULE using namespace ERRORLOG; using namespace HWAS; #else uint8_t sdcSaveFlags = SDC_NO_SAVE_FLAGS; size_t sz_uint8 = sizeof(uint8_t); #endif epubProcedureID thisProcedureID; bool ForceTerminate = false; bool iplDiagMode = false; ++iv_serviceActionCounter; uint16_t PRD_Reason_Code = 0; //************************************************************** // Callout loop to set up Reason code and SRC word 9 //************************************************************** // Must go thru callout list to look for RIOPORT procedure callouts, // since they require the port info to be in SRC Word 9 bool HW = false; bool SW = false; bool SW_High = false; bool SecondLevel = false; uint32_t SrcWord7 = 0; uint32_t SrcWord9 = 0; // Should not gard hardware if there is a hardware callout at LOW priority // and a symbolic FRU indicating a possibility of a software error at MED or // HIGH priority. bool sappSwNoGardReq = false, sappHwNoGardReq = false; const SDC_MRU_LIST & mruList = io_sdc.getMruList(); int32_t calloutsPlusDimms = mruList.size(); for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { PRDcallout thiscallout = it->callout; if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() ) { if ( (SP_CODE == thiscallout.flatten()) || (SYS_SW_CODE == thiscallout.flatten()) ) { SW = true; if ( MRU_LOW != it->priority ) { sappSwNoGardReq = true; } if ( MRU_MED == it->priority ) { SW_High = true; } } else if ( LEVEL2_SUPPORT == thiscallout.flatten()) { SecondLevel = true; if ( MRU_LOW != it->priority ) { sappSwNoGardReq = true; } } } else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() ) { MemoryMru memMru (thiscallout.flatten()); SrcWord9 = memMru.toUint32(); // Get MemMru value TargetHandleList partList = memMru.getCalloutList(); uint32_t partCount = partList.size(); calloutsPlusDimms = calloutsPlusDimms + partCount -1; HW = true; //hardware callout if ( MRU_LOW == it->priority ) { sappHwNoGardReq = true; } } else // PRDcalloutData::TYPE_TARGET { HW = true; // Hardware callout // Determines if all the hardware callouts have low priority. if ( MRU_LOW == it->priority ) { sappHwNoGardReq = true; } } } //////////////////////////////////////////////////////////////// //Set the PRD Reason Code based on the flags set in the above callout loop. //////////////////////////////////////////////////////////////// if (HW == true && SW == true) { if (SW_High == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE_PROBABLE; else PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; } else if (HW == true && SW == false && SecondLevel == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; else if (HW == true && SW == false && SecondLevel == false) PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE; else if (HW == false && SW == true) PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE; else { // If we get here both HW and SW flags were false. Callout may be // Second Level Support only, or a procedure not checked in the SW // flag code. PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE; } SrcWord7 = io_sdc.getPrimaryAttnType() << 8; SrcWord7 |= io_sdc.getSecondaryAttnType(); //-------------------------------------------------------------------------- // Check for IPL Diag Mode //-------------------------------------------------------------------------- #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) iplDiagMode = PlatServices::isInMdiaMode(); #endif //************************************************************** // Update Error Log with SRC //************************************************************** ErrorSignature * esig = io_sdc.GetErrorSignature(); updateSrc( esig->getChipId(), SrcWord7, esig->getSigId(), SrcWord9, PRD_Reason_Code); //************************************************************** // Add SDC Capture data to Error Log User Data here only if // there are 4 or more callouts, // (including Dimm callouts in the MemoryMru). //************************************************************** bool capDataAdded = false; if (calloutsPlusDimms > 3) { AddCapData( io_sdc.GetCaptureData(), iv_errl ); AddCapData( io_sdc.getTraceArrayData(), iv_errl ); capDataAdded = true; } //-------------------------------------------------------------------------- // Set the error log severity and get the error log action flags. //-------------------------------------------------------------------------- // Let's assume the default is the action for a system checkstop. #ifdef __HOSTBOOT_MODULE errlSeverity_t errlSev = ERRL_SEV_UNRECOVERABLE; #else errlSeverity errlSev = ERRL_SEV_UNRECOVERABLE; #endif uint32_t errlAct = ERRL_ACTION_SA | // Service action required. ERRL_ACTION_REPORT | // Report to HMC and hypervisor. ERRL_ACTION_CALL_HOME; // Call home. if ( MACHINE_CHECK != i_attnType ) // Anything other that a system checkstop { if ( io_sdc.queryServiceCall() ) // still a serviceable event { errlSev = ERRL_SEV_PREDICTIVE; } else // not a serviceable event { errlSev = io_sdc.queryLogging() ? ERRL_SEV_RECOVERED // should still be logged : ERRL_SEV_INFORMATIONAL; // can be ignored errlAct = ERRL_ACTION_HIDDEN; } } // This needs to be done after setting the SRCs otherwise it will be // overridden. iv_errl->setSev( errlSev ); // Add procedure callout for SUE attentions. The intent is to make sure the // customer looks for other service actions before replacing parts for this // attention. if ( io_sdc.IsSUE() ) { PRDF_HW_ADD_PROC_CALLOUT( SUE_PREV_ERR, MRU_HIGH, iv_errl, errlSev ); } //-------------------------------------------------------------------------- // Get the global gard policy. //-------------------------------------------------------------------------- HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_NULL; // Gard only if the error is a serviceable event. if ( io_sdc.queryServiceCall() ) { // We will not Resource Recover on a checkstop attention. gardPolicy = ( MACHINE_CHECK == i_attnType ) ? HWAS::GARD_Fatal : HWAS::GARD_Predictive; } if ( io_sdc.IsSUE() && ( MACHINE_CHECK == i_attnType ) ) { // If we are logging an error for an SUE consumed, we should not // perform any GARD here. Appropriate resources should have already // been GARDed for the original UE. gardPolicy = HWAS::GARD_NULL; } // Apply special policies for OPAL. if ( isHyprConfigOpal() && // OPAL is used !isMfgAvpEnabled() && !isMfgHdatAvpEnabled() ) // No AVPs running { // OPAL has requested that we disable garding for predictive errors // found at runtime. if ( HWAS::GARD_Predictive == gardPolicy ) { #if !defined(__HOSTBOOT_MODULE) // FSP only if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL; #elif defined(__HOSTBOOT_RUNTIME) // HBRT only gardPolicy = HWAS::GARD_NULL; #endif } // OPAL has requested that we diable garding for fatal errors (system // checkstops) that could have been caused by a software generated // attention at runtime. This will be determined if there is a software // callout with higher priority than a hardware callout. else if ( HWAS::GARD_Fatal == gardPolicy && sappSwNoGardReq && sappHwNoGardReq ) // Gard requirements met { #if !defined(__HOSTBOOT_MODULE) // FSP only if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL; #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS // Checkstop analysis is only done at the beginning of the IPL, // regardless if the checkstop actually occurred during the IPL // or at runtime. We will need to check the IPL state in FIR // data to determine when the checkstop occurred. // Get access to IPL state info from the FIR data in the PNOR. if ( !(PnorFirDataReader::getPnorFirDataReader().isIplState()) ) gardPolicy = HWAS::GARD_NULL; #endif #endif } } //-------------------------------------------------------------------------- // Get the global deconfig policy (must be done after setting gard policy). //-------------------------------------------------------------------------- HWAS::DeconfigEnum deconfigPolicy = HWAS::NO_DECONFIG; bool deferDeconfig = false; if ( HWAS::GARD_NULL != gardPolicy ) { #if !defined(__HOSTBOOT_MODULE) // FSP only // Change the deconfig state based the gard type. This is only required // to control what the FSP does during the reconfig loop. deconfigPolicy = HWSV::SvrError::isInHwReconfLoop() ? HWAS::DECONFIG : HWAS::NO_DECONFIG; #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only // Deferred Deconfig should be used throughout all of Hostboot (both // checkForIplAttns() and MDIA). deconfigPolicy = HWAS::DECONFIG; deferDeconfig = true; #endif } //-------------------------------------------------------------------------- // Get the HCDB diagnostics policy. //-------------------------------------------------------------------------- HWSV::hwsvDiagUpdate l_diagUpdate = HWSV::HWSV_DIAG_NEEDED; if ( ERRL_ACTION_HIDDEN == errlAct ) { // Diagnostics is not needed in the next IPL cycle for non-visible logs. l_diagUpdate = HWSV::HWSV_DIAG_NOT_NEEDED; } //-------------------------------------------------------------------------- // Initialize the PFA data //-------------------------------------------------------------------------- PfaData pfaData; initPfaData( io_sdc, i_attnType, deferDeconfig, errlAct, errlSev, gardPolicy, pfaData, o_dumpTrgt ); //-------------------------------------------------------------------------- // Add each mru/callout to the error log. //-------------------------------------------------------------------------- for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { PRDcallout thiscallout = it->callout; PRDpriority thispriority = it->priority; // Use the global gard/deconfig policies as default. HWAS::GARD_ErrorType thisGard = gardPolicy; HWAS::DeconfigEnum thisDeconfig = deconfigPolicy; // Change the gard/deconfig actions if this MRU should not be garded. if ( NO_GARD == it->gardState ) { thisGard = HWAS::GARD_NULL; thisDeconfig = HWAS::NO_DECONFIG; } // Add the callout to the PFA data addCalloutToPfaData( pfaData, thiscallout, thispriority, thisGard ); // Add callout based on callout type. if( PRDcalloutData::TYPE_TARGET == thiscallout.getType() ) { PRDF_HW_ADD_CALLOUT(thiscallout.getTarget(), thispriority, thisDeconfig, iv_errl, thisGard, errlSev, l_diagUpdate); } else if(PRDcalloutData::TYPE_PROCCLK == thiscallout.getType() || PRDcalloutData::TYPE_PCICLK == thiscallout.getType()) { PRDF_ADD_CLOCK_CALLOUT(iv_errl, thiscallout.getTarget(), thiscallout.getType(), thispriority, thisDeconfig, thisGard); } else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() ) { MemoryMru memMru (thiscallout.flatten()); TargetHandleList partList = memMru.getCalloutList(); for ( TargetHandleList::iterator it = partList.begin(); it != partList.end(); it++ ) { PRDF_HW_ADD_CALLOUT( *it, thispriority, thisDeconfig, iv_errl, thisGard, errlSev, l_diagUpdate ); } } else if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() ) { thisProcedureID = epubProcedureID(thiscallout.flatten()); PRDF_DTRAC( PRDF_FUNC "thisProcedureID: %x, thispriority: %x, " "errlSev: %x", thisProcedureID, thispriority,errlSev ); PRDF_HW_ADD_PROC_CALLOUT(thisProcedureID, thispriority, iv_errl, errlSev); // Use the flags set earlier to determine if the callout is just // Software (SP code or Phyp Code). Add a Second Level Support // procedure callout Low, for this case. if (HW == false && SW == true && SecondLevel == false) { PRDF_DTRAC( PRDF_FUNC "thisProcedureID= %x, thispriority=%x, " "errlSev=%x", LEVEL2_SUPPORT, MRU_LOW, errlSev ); PRDF_HW_ADD_PROC_CALLOUT( LEVEL2_SUPPORT, MRU_LOW, iv_errl, errlSev ); SecondLevel = true; } } } // Send the dynamic memory Dealloc message for DIMMS for Predictive // callouts. // We can not check for ERRL severity here as there are some cases // e.g. DD02 where we create a Predictive error log but callouts // are not predictive. if ( HWAS::GARD_Predictive == gardPolicy ) { deallocateDimms( mruList ); } //************************************************************** // Check for Terminating the system for non mnfg conditions. //************************************************************** ForceTerminate = checkForceTerm( io_sdc, o_dumpTrgt, pfaData ); //************************************************************* // Check for Manufacturing Mode terminate here and then do // the needed overrides on ForceTerminate flag. //************************************************************* if ( PlatServices::mnfgTerminate() && !ForceTerminate ) { ForceTerminate = true; if ( !((errlSev == ERRL_SEV_RECOVERED) || (errlSev == ERRL_SEV_INFORMATIONAL)) && iplDiagMode && !HW ) { //Terminate in Manufacturing Mode, in IPL mode, for visible log, with no HW callouts. PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG); } // Do not terminate if recoverable or informational. // Do not terminate if deferred deconfig. else if ( deferDeconfig || (errlSev == ERRL_SEV_RECOVERED ) || (errlSev == ERRL_SEV_INFORMATIONAL) ) { ForceTerminate = false; errlAct |= ERRL_ACTION_DONT_TERMINATE; } else { PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG); } pfaData.errlActions = errlAct; } // Needed to move the errl add user data sections here because of some updates // of the data required in the Aysnc section for the SMA dual reporting fix. //************************************************************** // Add the PFA data to Error Log User Data //************************************************************** UtilMem l_membuf; l_membuf << pfaData; PRDF_ADD_FFDC( iv_errl, (const char*)l_membuf.base(), l_membuf.size(), ErrlVer1, ErrlSectPFA5_1 ); //************************************************************** // Add SDC Capture data to Error Log User Data //************************************************************** // Pulled some code out to incorporate into AddCapData // Check to make sure Capture Data wasn't added earlier. if (!capDataAdded) { AddCapData( io_sdc.GetCaptureData(), iv_errl ); AddCapData( io_sdc.getTraceArrayData(), iv_errl ); } //************************************************************************** // Add extended MemoryMru error log sections (if needed). //************************************************************************** for ( SDC_MRU_LIST::const_iterator it = mruList.begin(); it < mruList.end(); ++it ) { // Operate only on MemoryMru callouts. if ( PRDcalloutData::TYPE_MEMMRU != it->callout.getType() ) continue; /* TODO RTC 136125 // Only add single DIMM callouts. Otherwise, the parsed data is // redundant. MemoryMru memMru ( it->callout.flatten() ); if ( !memMru.getSymbol().isValid() ) continue; // Add the MemoryMru to the capture data. CenMbaCaptureData::addExtMemMruData( memMru, iv_errl ); */ } //************************************************************************** // Additional FFDC //************************************************************************** // For OP checkstop analysis, add a string indicating a system checkstop // occurred and when. This will be printed out in the console traces along // with the error log. #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) // IPL only #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS if ( MACHINE_CHECK == i_attnType ) { const char * const str = PnorFirDataReader::getPnorFirDataReader().isIplState() ? "System checkstop occurred during IPL on previous boot" : "System checkstop occurred during runtime on previous boot"; ErrlUserDetailsString(str).addToLog(iv_errl); } #endif #endif // Collect PRD traces. // NOTE: Each line of a trace is on average 36 bytes so 768 bytes should get // us around 21 lines of trace output. PRDF_COLLECT_TRACE(iv_errl, 768); //************************************************************** // Commit the error log. // This will also perform Gard and Deconfig actions. // Do the Unit Dumps if needed. //************************************************************** // Add the MNFG trace information. MnfgTrace( io_sdc.GetErrorSignature(), pfaData ); // If this is not a terminating condition, commit the error log. If the // error log is not committed, the error log will be passed back to // PRDF::main() and eventually ATTN. if ( MACHINE_CHECK != pfaData.priAttnType && !ForceTerminate && !pfaData.TERMINATE ) { // Handle any unit checkstop conditions, if needed (i.e. runtime // deconfiguration, dump/FFDC collection, etc. if ( io_sdc.IsUnitCS() && !io_sdc.IsUsingSavedSdc() ) { handleUnitCS( io_sdc, o_dumpTrgt, o_initiateHwudump ); } if ( true == o_initiateHwudump ) { // the dump log will be deleted later in PRDF::main // after the hwudump is initiated there. o_dumpErrl = iv_errl; iv_errl = NULL; o_dumpErrlActions = errlAct; PRDF_TRAC( PRDF_FUNC "for target: 0x%08x, i_errl: 0x%08x, " "i_errlActions: 0x%08x", getHuid(o_dumpTrgt), ERRL_GETRC_SAFE(o_dumpErrl), o_dumpErrlActions ); } else { // Commit the error log. commitErrLog( iv_errl, pfaData ); } } #ifndef __HOSTBOOT_MODULE errlHndl_t reg_errl = UtilReg::read ("prdf/RasServices", &sdcSaveFlags, sz_uint8); if (reg_errl) { PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry read" ); PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT); } else { //Turn off indicator that there is saved Sdc Analysis info sdcSaveFlags &= ( ~SDC_ANALYSIS_SAVE_FLAG ); reg_errl = UtilReg::write ("prdf/RasServices", &sdcSaveFlags, sz_uint8); if (reg_errl) { PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry write" ); PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT); } } #endif PRDF_INF( PRDF_FUNC "PRD called to analyze an error: 0x%08x 0x%08x", esig->getChipId(), esig->getSigId() ); // Reset iv_errl to NULL. This is done to catch logical bug in our code. // It enables us to assert in createInitialErrl function if iv_errl is // not NULL which should catch any logical bug in initial stages of testing. errlHndl_t o_errl = iv_errl; iv_errl = NULL; return o_errl; #undef PRDF_FUNC }
/** * @brief Stops OCCs on all Processors in the node */ errlHndl_t stopAllOCCs() { TRACFCOMP( g_fapiTd,ENTER_MRK"stopAllOCCs" ); errlHndl_t l_errl = NULL; bool winkle_loaded = false; do { #ifndef __HOSTBOOT_RUNTIME //OCC requires the build_winkle_images library if ( !VFS::module_is_loaded( "libbuild_winkle_images.so" ) ) { l_errl = VFS::module_load( "libbuild_winkle_images.so" ); if ( l_errl ) { // load module returned with errl set TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" ); // break from do loop if error occured break; } winkle_loaded = true; } #endif TargetHandleList procChips; getAllChips(procChips, TYPE_PROC, true); if(procChips.size() == 0) { TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" ); //We'll never get this far in the IPL without any processors, // so just exit. break; } TRACFCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: %d procs found", procChips.size()); //The OCC Procedures require processors within a DCM be //setup together. If DCM installed is set, we work under //the assumption that each nodeID is a DCM. So sort the //list by NodeID then call OCC Procedures on NodeID pairs. std::sort(procChips.begin(), procChips.end(), orderByNodeAndPosition); //The OCC master for the node must be reset last. For all //OP systems there is only a single OCC that can be the //master so it is safe to look at the MASTER_CAPABLE flag. Target* masterProc0 = NULL; Target* masterProc1 = NULL; TargetHandleList::iterator itr1 = procChips.begin(); if(0 == (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>()) { TRACUCOMP( g_fapiTd, INFO_MRK"stopAllOCCs: non-dcm path entered"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { TargetHandleList pOccs; getChildChiplets(pOccs, *itr, TYPE_OCC); if (pOccs.size() > 0) { if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() ) { masterProc0 = *itr; continue; } } l_errl = HBOCC::stopOCC( *itr, NULL ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed"); errlCommit (l_errl, HWPF_COMP_ID); // just commit and try the next chip } } if (l_errl) { break; } } else { TRACFCOMP( g_fapiTd, INFO_MRK"stopAllOCCs: Following DCM Path"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { Target* targ0 = *itr; Target* targ1 = NULL; TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: Cur target nodeID=%d", targ0->getAttr<ATTR_FABRIC_NODE_ID>()); //if the next target in the list is in the same node // they are on the same DCM, so bump itr forward // and update targ0 pointer if((itr+1) != procChips.end()) { TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: n+1 target nodeID=%d", ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())); if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) == ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())) { //need to flip the numbers because we were reversed targ1 = targ0; itr++; targ0 = *itr; } } TargetHandleList pOccs; getChildChiplets(pOccs, targ0, TYPE_OCC); if (pOccs.size() > 0) { if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() ) { masterProc0 = targ0; masterProc1 = targ1; continue; } } l_errl = HBOCC::stopOCC( targ0, targ1 ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed"); errlCommit (l_errl, HWPF_COMP_ID); // just commit and try the next module } } if (l_errl) { break; } } //now do the master OCC if( masterProc0 ) { l_errl = HBOCC::stopOCC( masterProc0, masterProc1 ); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed on master"); break; } } } while(0); //make sure we always unload the module if we loaded it if (winkle_loaded) { #ifndef __HOSTBOOT_RUNTIME errlHndl_t l_tmpErrl = VFS::module_unload( "libbuild_winkle_images.so" ); if ( l_tmpErrl ) { TRACFCOMP( g_fapiTd,ERR_MRK"stopAllOCCs: Error unloading build_winkle module" ); if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } #endif } TRACFCOMP( g_fapiTd,EXIT_MRK"stopAllOCCs" ); return l_errl; }
/** * @brief Starts OCCs on all Processors in the node * This is intended to be used for AVP testing. * * @param[out] o_failedOccTarget: Pointer to the target failing * loadnStartAllOccs * @param[in] i_useSRAM: bool - use SRAM for OCC image, ie during IPL * true if during IPL, false if at end of IPL (default) * @return errlHndl_t Error log if OCC load failed */ errlHndl_t loadnStartAllOccs(TARGETING::Target *& o_failedOccTarget, bool i_useSRAM) { errlHndl_t l_errl = NULL; void* homerVirtAddrBase = NULL; uint64_t homerPhysAddrBase = VMM_HOMER_REGION_START_ADDR; bool winkle_loaded = false; TRACUCOMP( g_fapiTd, ENTER_MRK"loadnStartAllOccs(%d)", i_useSRAM); do { #ifndef __HOSTBOOT_RUNTIME //OCC requires the build_winkle_images library if ( !VFS::module_is_loaded( "libbuild_winkle_images.so" ) ) { l_errl = VFS::module_load( "libbuild_winkle_images.so" ); if ( l_errl ) { // load module returned with errl set TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" ); // break from do loop if error occured break; } winkle_loaded = true; } assert(VMM_HOMER_REGION_SIZE <= THIRTYTWO_GB, "loadnStartAllOccs: Unsupported HOMER Region size"); if (!i_useSRAM) { //If running Sapphire need to place this at the top of memory if(TARGETING::is_sapphire_load()) { homerPhysAddrBase = TARGETING::get_top_mem_addr(); assert (homerPhysAddrBase != 0, "loadnStartAllOccs: Top of memory was 0!"); homerPhysAddrBase -= VMM_ALL_HOMER_OCC_MEMORY_SIZE; } TRACFCOMP( g_fapiTd, "HOMER is at %.16X", homerPhysAddrBase ); //Map entire homer region into virtual memory homerVirtAddrBase = mm_block_map(reinterpret_cast<void*>(homerPhysAddrBase), VMM_HOMER_REGION_SIZE); TRACFCOMP( g_fapiTd, "HOMER virtaddrbase %.16X", homerVirtAddrBase ); } else { // malloc space big enough for all of OCC homerVirtAddrBase = (void *)malloc(1 * MEGABYTE); homerPhysAddrBase = mm_virt_to_phys(homerVirtAddrBase); } #endif if (i_useSRAM) { // OCC is going into L3 and SRAM so only need 1 prime and load // into the Master Proc TargetService & tS = targetService(); Target * sysTarget = NULL; tS.getTopLevelTarget( sysTarget ); assert( sysTarget != NULL ); Target* masterproc = NULL; tS.masterProcChipTargetHandle( masterproc ); /******* SETUP AND LOAD **************/ l_errl = primeAndLoadOcc (masterproc, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = masterproc; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs:primeAndLoadOcc failed"); free(homerVirtAddrBase); break; } /********* START OCC *************/ l_errl = HBOCC::startOCC (masterproc, NULL, o_failedOccTarget); // it either started or errored; either way, free the memory free(homerVirtAddrBase); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed"); break; } } else { TargetHandleList procChips; getAllChips(procChips, TYPE_PROC, true); if(procChips.size() == 0) { TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" ); //We'll never get this far in the IPL without any processors, // so just exit. break; } TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: %d procs found", procChips.size()); TargetHandleList::iterator itr1 = procChips.begin(); //The OCC Procedures require processors within a DCM be //setup together. So, first checking if any proc has //DCM installed attribute set. If not, we can iterate //over the list in any order. //If DCM installed is set, we work under the assumption //that each nodeID is a DCM. So sort the list by NodeID //then call OCC Procedures on NodeID pairs. if(0 == (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>()) { TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: non-dcm path entered"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { /******* SETUP AND LOAD **************/ l_errl = primeAndLoadOcc (*itr, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = *itr; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs:primeAndLoadOcc failed"); break; } /********* START OCC *************/ l_errl = HBOCC::startOCC (*itr, NULL, o_failedOccTarget); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed"); break; } } if (l_errl) { break; } } else { TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: Following DCM Path"); std::sort(procChips.begin(), procChips.end(), orderByNodeAndPosition); TRACUCOMP( g_fapiTd, INFO_MRK"loadnStartAllOccs: procChips list sorted"); for (TargetHandleList::iterator itr = procChips.begin(); itr != procChips.end(); ++itr) { TRACUCOMP( g_fapiImpTd, INFO_MRK"loadnStartAllOccs: Insepcting first target" ); Target* targ0 = *itr; Target* targ1 = NULL; TRACUCOMP( g_fapiImpTd, INFO_MRK "loadnStartAllOccs: Cur target nodeID=%d", targ0->getAttr<ATTR_FABRIC_NODE_ID>()); //if the next target in the list is in the same node // they are on the same DCM, so bump itr forward // and update targ1 pointer if((itr+1) != procChips.end()) { TRACUCOMP( g_fapiImpTd, INFO_MRK "loadnStartAllOccs: n+1 target nodeID=%d", ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>()) ); if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) == ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())) { itr++; targ1 = *itr; } } /********** Setup and load targ0 ***********/ l_errl = primeAndLoadOcc (targ0, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = targ0; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: " "primeAndLoadOcc failed on targ0"); break; } /*********** Setup and load targ1 **********/ l_errl = primeAndLoadOcc (targ1, homerVirtAddrBase, homerPhysAddrBase, i_useSRAM); if(l_errl) { o_failedOccTarget = targ1; TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: " "primeAndLoadOcc failed on targ1"); break; } /*********** Start OCC *******************/ l_errl = HBOCC::startOCC (targ0, targ1, o_failedOccTarget); if (l_errl) { TRACFCOMP( g_fapiImpTd, ERR_MRK "loadnStartAllOccs: start failed"); break; } } if (l_errl) { break; } } } } while(0); errlHndl_t l_tmpErrl = NULL; //Unless HTMGT is in use, there are no further accesses to HOMER memory //required during the IPL #ifndef CONFIG_HTMGT if(homerVirtAddrBase) { int rc = 0; rc = mm_block_unmap(homerVirtAddrBase); if (rc != 0) { /*@ * @errortype * @moduleid fapi::MOD_OCC_LOAD_START_ALL_OCCS * @reasoncode fapi::RC_MM_UNMAP_ERR * @userdata1 Return Code * @userdata2 Unmap address * @devdesc mm_block_unmap() returns error * @custdesc A problem occurred during the IPL * of the system. */ l_tmpErrl = new ERRORLOG::ErrlEntry( ERRORLOG::ERRL_SEV_UNRECOVERABLE, fapi::MOD_OCC_LOAD_START_ALL_OCCS, fapi::RC_MM_UNMAP_ERR, rc, reinterpret_cast<uint64_t> (homerVirtAddrBase)); if(l_tmpErrl) { if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } } } #endif //make sure we always unload the module if (winkle_loaded) { l_tmpErrl = VFS::module_unload( "libbuild_winkle_images.so" ); if ( l_tmpErrl ) { TRACFCOMP ( g_fapiTd,ERR_MRK "loadnStartAllOccs: Error unloading build_winkle module" ); if(l_errl) { errlCommit( l_tmpErrl, HWPF_COMP_ID ); } else { l_errl = l_tmpErrl; } } } TRACUCOMP( g_fapiTd, EXIT_MRK"loadnStartAllOccs" ); return l_errl; }