Ejemplo n.º 1
0
TargetHandle_t TargetServiceImpl::getMcs(
        TargetHandle_t i_proc,
        uint64_t i_pos)
{
    PredicateCTM classTypeMatch(CLASS_UNIT, TYPE_MCS);
    PredicateIsFunctional functionalMatch;
    PredicatePostfixExpr pred;

    class ChipUnitMatch : public PredicateBase
    {
        uint8_t iv_pos;

        public:

        bool operator()(const Target * i_target) const
        {
            uint8_t pos;

            bool match = false;

            if(i_target->tryGetAttr<ATTR_CHIP_UNIT>(pos))
            {
                match = iv_pos == pos;
            }

            return match;
        }

        explicit ChipUnitMatch(uint8_t i_pos)
            : iv_pos(i_pos) {}

    } chipUnitMatch(i_pos);

    pred.push(&classTypeMatch).push(
            &functionalMatch).And();

    pred.push(&chipUnitMatch).And();

    TargetHandleList list;
    TargetHandle_t mcs = NULL;

    targetService().getAssociated(
            list,
            i_proc,
            TARGETING::TargetService::CHILD_BY_AFFINITY,
            TARGETING::TargetService::ALL,
            &pred);

    if(list.size() == 1)
    {
        mcs = list[0];
    }

    return mcs;
}
Ejemplo n.º 2
0
/**
 * @brief  Optional plugin function called after analysis is complete but
 *         before PRD exits.
 * @param  i_cenChip A Centaur MBA chip.
 * @param  i_sc      The step code data struct.
 * @note   This is especially useful for any analysis that still needs to be
 *         done after the framework clears the FIR bits that were at attention.
 * @return SUCCESS.
 */
int32_t PllPostAnalysis( ExtensibleChip * i_cenChip,
                         STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[Membuf::PllPostAnalysis] "

    int32_t o_rc = SUCCESS;

    TargetHandle_t cenTrgt = i_cenChip->GetChipHandle();

    do
    {
        // need to clear associated bits in the MCIFIR bits.
        o_rc = MemUtils::mcifirCleanup( i_cenChip, i_sc );
        if( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"mcifirCleanup() failed");
            break;
        }

        // Check to make sure we are at threshold and have something garded.
        if ( !i_sc.service_data->IsAtThreshold() ||
             (GardAction::NoGard == i_sc.service_data->QueryGard()) )
        {
            break; // nothing to do
        }

        TargetHandleList list = getConnected( cenTrgt, TYPE_MBA );
        if ( 0 == list.size() )
        {
            PRDF_ERR( PRDF_FUNC"getConnected(0x%08x, TYPE_MBA) failed",
                      getHuid(cenTrgt) );
            o_rc = FAIL; break;
        }

        // Send SKIP_MBA message for each MBA.
        for ( TargetHandleList::iterator mbaIt = list.begin();
              mbaIt != list.end(); ++mbaIt )
        {
            int32_t l_rc = mdiaSendEventMsg( *mbaIt, MDIA::SKIP_MBA );
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"mdiaSendEventMsg(0x%08x, SKIP_MBA) failed",
                          getHuid(*mbaIt) );
                o_rc |= FAIL;
                continue; // keep going
            }
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 3
0
TargetHandle_t TargetServiceImpl::getMembuf(
        TargetHandle_t i_mcs)
{
    TargetHandle_t membuf = NULL;

    TargetHandleList list;
    getChildAffinityTargets(list, i_mcs, CLASS_CHIP, TYPE_MEMBUF);

    if(list.size() == 1)
    {
        membuf = list[0];
    }

    return membuf;
}
Ejemplo n.º 4
0
TargetHandle_t TargetServiceImpl::getMcs(
        TargetHandle_t i_membuf)
{
    TargetHandle_t mcs = NULL;

    TargetHandleList list;
    PredicateCTM pred(CLASS_UNIT, TYPE_MCS);

    targetService().getAssociated(
            list,
            i_membuf,
            TARGETING::TargetService::PARENT_BY_AFFINITY,
            TARGETING::TargetService::IMMEDIATE,
            &pred);

    if(list.size() == 1)
    {
        mcs = list[0];
    }

    return mcs;
}
Ejemplo n.º 5
0
TargetHandle_t TargetServiceImpl::getProc(
        TargetHandle_t i_membuf)
{
    TargetHandle_t proc = NULL;

    TargetHandleList list;
    PredicateCTM pred(CLASS_CHIP, TYPE_PROC);

    targetService().getAssociated(
            list,
            i_membuf,
            TARGETING::TargetService::PARENT_BY_AFFINITY,
            TARGETING::TargetService::ALL,
            &pred);

    if(list.size() == 1)
    {
        proc = list[0];
    }

    return proc;
}
Ejemplo n.º 6
0
errlHndl_t startScrub()
{
    #define PRDF_FUNC "[PRDF::startScrub] "
    PRDF_ENTER( PRDF_FUNC );

    errlHndl_t o_errl = NULL;

    int32_t l_rc = SUCCESS;
    HUID nodeId = INVALID_HUID;

    // will unlock when going out of scope
    PRDF_SYSTEM_SCOPELOCK;

    do
    {
        // Since the last refresh is in istep10 host_prd_hwreconfig,
        // it may be good to call it again here at istep16 mss_scrub
        // to remove any non-functional MBAs from PRD system model.
        o_errl = noLock_refresh();
        // This shouldn't return any error but if it does, break out
        if(NULL != o_errl)
        {
            PRDF_ERR( PRDF_FUNC"noLock_refresh() failed" );
            break;
        }

        // This is run in Hostboot so there should only be one node.
        TargetHandleList list = getFunctionalTargetList( TYPE_NODE );
        if ( 1 != list.size() )
        {
            PRDF_ERR( PRDF_FUNC"getFunctionalTargetList(TYPE_NODE) failed" );
            l_rc = FAIL; break;
        }
        nodeId = getHuid(list[0]);

        PRDF_ENTER( PRDF_FUNC"HUID=0x%08x", nodeId );

        // Start scrubbing on all MBAs.
        MbaDomain * domain = (MbaDomain *)systemPtr->GetDomain(MBA_DOMAIN);
        if ( NULL == domain )
        {
            PRDF_ERR( PRDF_FUNC"MBA_DOMAIN not found. nodeId=0x%08x", nodeId );
            l_rc = FAIL; break;
        }
        l_rc = domain->startScrub();

        PRDF_EXIT( PRDF_FUNC"HUID=0x%08x", nodeId );

    } while (0);

    if (( SUCCESS != l_rc ) && (NULL == o_errl))
    {
        // Get user data
        uint64_t ud12 = PRDF_GET_UINT64_FROM_UINT32( nodeId, __LINE__ );
        uint64_t ud34 = PRDF_GET_UINT64_FROM_UINT32( 0,      0        );

        // Create error log
        o_errl = new ERRORLOG::ErrlEntry(
                            ERRORLOG::ERRL_SEV_UNRECOVERABLE, // severity
                            PRDF_START_SCRUB,                 // module ID
                            PRDF_DETECTED_FAIL_SOFTWARE,      // reason code
                            ud12, ud34 );                     // user data 1-4

        // Add 2nd level support
        o_errl->addProcedureCallout( EPUB_PRC_LVL_SUPP, SRCI_PRIORITY_HIGH );

        // Add traces
        o_errl->collectTrace( PRDF_COMP_NAME, 512 );
    }

    PRDF_EXIT( PRDF_FUNC );

    return o_errl;

    #undef PRDF_FUNC
}
Ejemplo n.º 7
0
errlHndl_t PegasusConfigurator::addDomainChips( TARGETING::TYPE  i_type,
                                          RuleChipDomain * io_domain,
                                          PllDomainList  * io_pllDomains,
                                          PllDomainList  * io_pllDomains2)
{
    using namespace TARGETING;
    errlHndl_t l_errl = NULL ;

    // Get references to factory objects.
    ScanFacility      & scanFac = ScanFacility::Access();
    ResolutionFactory & resFac  = ResolutionFactory::Access();

    // Get all targets of specified type and add to given domain.
    TargetHandleList list = PlatServices::getFunctionalTargetList( i_type );

    if ( 0 == list.size() )
    {
        PRDF_ERR( "[addDomainChips] getFunctionalTargetList "
                  "returned empty list for i_type=%d", i_type );
    }
    else
    {
        // Get rule filename based on type.
        const char * fileName = "";
        switch ( i_type )
        {
            case TYPE_PROC:   
                // Check which Proc chip type
                if (MODEL_NAPLES == getProcModel(list[0]))
                    fileName = NaplesProc;
                else
                    fileName = MuranoVeniceProc;
                break;
            case TYPE_EX:     fileName = Ex;     break;
            case TYPE_MCS:    fileName = Mcs;    break;
            case TYPE_MEMBUF: fileName = Membuf; break;
            case TYPE_MBA:    fileName = Mba;    break;

            default:
                // Print a trace statement, but do not fail the build.
                PRDF_ERR( "[addDomainChips] Unsupported target type: %d",
                          i_type );
        }

        for ( TargetHandleList::const_iterator itr = list.begin();
              itr != list.end(); ++itr )
        {
            if ( NULL == *itr ) continue;

            RuleChip * chip = new RuleChip( fileName, *itr,
                                            scanFac, resFac,l_errl );
            if( NULL != l_errl )
            {
                delete chip;
                break;
            }
            sysChipLst.push_back( chip );
            io_domain->AddChip(   chip );

            // PLL domains
            switch ( i_type )
            {
                case TYPE_PROC:
                    addChipsToPllDomain(CLOCK_DOMAIN_FAB,
                                        io_pllDomains,
                                        chip,
                                        *itr,
                                        scanFac,
                                        resFac);
                    addChipsToPllDomain(CLOCK_DOMAIN_IO,
                                        io_pllDomains2,
                                        chip,
                                        *itr,
                                        scanFac,
                                        resFac);
                    break;
                case TYPE_MEMBUF:
                    addChipsToPllDomain(CLOCK_DOMAIN_MEMBUF,
                                        io_pllDomains,
                                        chip,
                                        *itr,
                                        scanFac,
                                        resFac);
                    break;
                default:
                    break;
            }
        }

        // Flush rule table cache since objects are all built.
        Prdr::LoadChipCache::flushCache();

    }

    return  l_errl;
}
Ejemplo n.º 8
0
int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[CenMbaTdCtlr::handleUE] "

    using namespace CalloutUtil;

    int32_t o_rc = SUCCESS;

    iv_tdState = NO_OP; // Abort the TD procedure.

    setTdSignature( io_sc, PRDFSIG_MaintUE );
    io_sc.service_data->SetServiceCall();

    CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );

    do
    {
        // Clean up the maintenance command. This is needed just in case the UE
        // isolation procedure is modified to use maintenance commands.
        o_rc = cleanupPrevCmd();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" );
            break;
        }

        // Look for all failing bits on this rank.
        CenDqBitmap bitmap;
        o_rc = mssIplUeIsolation( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"mssIplUeIsolation() failed" );
            break;
        }

        // Add UE data to capture data.
        bitmap.getCaptureData( io_sc.service_data->GetCaptureData() );

        // Callout the failing DIMMs.
        TargetHandleList callouts;
        for ( int32_t ps = 0; ps < PORT_SLCT_PER_MBA; ps++ )
        {
            bool badDqs = false;
            o_rc = bitmap.badDqs( ps, badDqs );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC"badDqs(%d) failed", ps );
                break;
            }

            if ( !badDqs ) continue; // nothing to do.

            TargetHandleList dimms = getConnectedDimms(iv_mbaTrgt, iv_rank, ps);
            if ( 0 == dimms.size() )
            {
                PRDF_ERR( PRDF_FUNC"getConnectedDimms(%d) failed", ps );
                o_rc = FAIL; break;
            }

            callouts.insert( callouts.end(), dimms.begin(), dimms.end() );

            if ( isMfgCeCheckingEnabled() )
            {
                // As we are doing callout for UE, we dont need to do callout
                // during CE for this rank on given port
                mbadb->getIplCeStats()->banAnalysis( iv_rank, ps );
            }
        }
        if ( SUCCESS != o_rc ) break;

        if ( 0 == callouts.size() )
        {
            // It is possible the scrub counters have rolled over to zero due to
            // a known DD1.0 hardware bug. In this case, the best we can do is
            // callout both DIMMs, because at minimum we know there was a UE, we
            // just don't know where.
            // NOTE: If this condition happens because of a DD2.0+ bug, the
            //       mssIplUeIsolation procedure will callout the Centaur.
            callouts = getConnectedDimms( iv_mbaTrgt, iv_rank );
            if ( 0 == callouts.size() )
            {
                PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" );
                o_rc = FAIL; break;
            }

            if ( isMfgCeCheckingEnabled() )
            {
                // As we are doing callout for UE, we dont need to do callout
                // during CE for this rank on both port
                mbadb->getIplCeStats()->banAnalysis( iv_rank);
            }
        }

        // Callout all DIMMs in the list.
        for ( TargetHandleList::iterator i = callouts.begin();
              i != callouts.end(); i++ )
        {
            io_sc.service_data->SetCallout( *i, MRU_HIGH );
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 9
0
errlHndl_t discoverTargets()
{
    HWAS_INF("discoverTargets entry");
    errlHndl_t errl = NULL;

    //  loop through all the targets and set HWAS_STATE to a known default
    for (TargetIterator target = targetService().begin();
            target != targetService().end();
            ++target)
    {
        HwasState hwasState             = target->getAttr<ATTR_HWAS_STATE>();
        hwasState.deconfiguredByEid     = 0;
        hwasState.poweredOn             = false;
        hwasState.present               = false;
        hwasState.functional            = false;
        hwasState.dumpfunctional        = false;
        target->setAttr<ATTR_HWAS_STATE>(hwasState);
    }

    // Assumptions and actions:
    // CLASS_SYS (exactly 1) - mark as present
    // CLASS_ENC, TYPE_PROC, TYPE_MEMBUF, TYPE_DIMM
    //     (ALL require hardware query) - call platPresenceDetect
    //  \->children: CLASS_* (NONE require hardware query) - mark as present
    do
    {
        // find CLASS_SYS (the top level target)
        Target* pSys;
        targetService().getTopLevelTarget(pSys);

        HWAS_ASSERT(pSys,
                "HWAS discoverTargets: no CLASS_SYS TopLevelTarget found");

        // mark this as present
        enableHwasState(pSys, true, true, 0);
        HWAS_DBG("pSys %.8X - marked present",
            pSys->getAttr<ATTR_HUID>());

        // find list of all we need to call platPresenceDetect against
        PredicateCTM predEnc(CLASS_ENC);
        PredicateCTM predChip(CLASS_CHIP);
        PredicateCTM predDimm(CLASS_LOGICAL_CARD, TYPE_DIMM);
        PredicatePostfixExpr checkExpr;
        checkExpr.push(&predChip).push(&predDimm).Or().push(&predEnc).Or();

        TargetHandleList pCheckPres;
        targetService().getAssociated( pCheckPres, pSys,
            TargetService::CHILD, TargetService::ALL, &checkExpr );

        // pass this list to the hwas platform-specific api where
        // pCheckPres will be modified to only have present targets
        HWAS_DBG("pCheckPres size: %d", pCheckPres.size());
        errl = platPresenceDetect(pCheckPres);
        HWAS_DBG("pCheckPres size: %d", pCheckPres.size());

        if (errl != NULL)
        {
            break; // break out of the do/while so that we can return
        }

        // for each, read their ID/EC level. if that works,
        //  mark them and their descendants as present
        //  read the partialGood vector to determine if any are not functional
        //  and read and store values from the PR keyword

        // list of procs and data that we'll need to look at the PR keyword
        procRestrict_t l_procEntry;
        std::vector <procRestrict_t> l_procPRList;

        // sort the list by ATTR_HUID to ensure that we
        //  start at the same place each time
        std::sort(pCheckPres.begin(), pCheckPres.end(),
                compareTargetHuid);

        for (TargetHandleList::const_iterator pTarget_it = pCheckPres.begin();
                pTarget_it != pCheckPres.end();
                ++pTarget_it
            )
        {
            TargetHandle_t pTarget = *pTarget_it;

            // if CLASS_ENC is still in this list, mark as present
            if (pTarget->getAttr<ATTR_CLASS>() == CLASS_ENC)
            {
                enableHwasState(pTarget, true, true, 0);
                HWAS_DBG("pTarget %.8X - CLASS_ENC marked present",
                    pTarget->getAttr<ATTR_HUID>());

                // on to the next target
                continue;
            }

            bool chipPresent = true;
            bool chipFunctional = true;
            uint32_t errlEid = 0;
            uint16_t pgData[VPD_CP00_PG_DATA_LENGTH / sizeof(uint16_t)];
            bzero(pgData, sizeof(pgData));

            if (pTarget->getAttr<ATTR_CLASS>() == CLASS_CHIP)
            {
                // read Chip ID/EC data from these physical chips
                errl = platReadIDEC(pTarget);

                if (errl)
                {   // read of ID/EC failed even tho we THOUGHT we were present.
                    HWAS_INF("pTarget %.8X - read IDEC failed (eid 0x%X) - bad",
                        errl->eid(), pTarget->getAttr<ATTR_HUID>());
                    // chip NOT present and NOT functional, so that FSP doesn't
                    // include this for HB to process
                    chipPresent = false;
                    chipFunctional = false;
                    errlEid = errl->eid();

                    // commit the error but keep going
                    errlCommit(errl, HWAS_COMP_ID);
                    // errl is now NULL
                }
                else if (pTarget->getAttr<ATTR_TYPE>() == TYPE_PROC)
                {
                    // read partialGood vector from these as well.
                    errl = platReadPartialGood(pTarget, pgData);

                    if (errl)
                    {   // read of PG failed even tho we were present..
                        HWAS_INF("pTarget %.8X - read PG failed (eid 0x%X)- bad",
                            errl->eid(), pTarget->getAttr<ATTR_HUID>());
                        chipFunctional = false;
                        errlEid = errl->eid();

                        // commit the error but keep going
                        errlCommit(errl, HWAS_COMP_ID);
                        // errl is now NULL
                    }
                    else
                    // look at the 'nest' logic to override the functionality
                    //  of this proc
                    if (pgData[VPD_CP00_PG_PIB_INDEX] !=
                                    VPD_CP00_PG_PIB_GOOD)
                    {
                        HWAS_INF("pTarget %.8X - PIB pgPdata[%d]: expected 0x%04X - bad",
                            pTarget->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_PIB_INDEX,
                            VPD_CP00_PG_PIB_GOOD);
                        chipFunctional = false;
                    }
                    else
                    if (pgData[VPD_CP00_PG_PERVASIVE_INDEX] !=
                                    VPD_CP00_PG_PERVASIVE_GOOD)
                    {
                        HWAS_INF("pTarget %.8X - Pervasive pgPdata[%d]: expected 0x%04X - bad",
                            pTarget->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_PERVASIVE_INDEX,
                            VPD_CP00_PG_PERVASIVE_GOOD);
                        chipFunctional = false;
                    }
                    else
                    if ((pgData[VPD_CP00_PG_POWERBUS_INDEX] &
                             VPD_CP00_PG_POWERBUS_BASE) !=
                                    VPD_CP00_PG_POWERBUS_BASE)
                    {
                        HWAS_INF("pTarget %.8X - PowerBus pgPdata[%d]: expected 0x%04X - bad",
                            pTarget->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_POWERBUS_INDEX,
                            VPD_CP00_PG_POWERBUS_BASE);
                        chipFunctional = false;
                    }
                    else
                    {
                        // read the PR keywords that we need, so that if
                        //  we have errors, we can handle them as approprite.
                        uint8_t prData[VPD_VINI_PR_DATA_LENGTH/sizeof(uint8_t)];
                        bzero(prData, sizeof(prData));
                        errl = platReadPR(pTarget, prData);
                        if (errl != NULL)
                        {   // read of PR keyword failed
                            HWAS_INF("pTarget %.8X - read PR failed - bad",
                                pTarget->getAttr<ATTR_HUID>());
                            chipFunctional = false;
                            errlEid = errl->eid();

                            // commit the error but keep going
                            errlCommit(errl, HWAS_COMP_ID);
                            // errl is now NULL
                        }
                        else
                        {
                            // save info so that we can
                            //  process the PR keyword after this loop
                            HWAS_INF("pTarget %.8X - pushing to procPRlist; FRU_ID %d",
                                pTarget->getAttr<ATTR_HUID>(),
                                pTarget->getAttr<ATTR_FRU_ID>());
                            l_procEntry.target = pTarget;
                            l_procEntry.group = pTarget->getAttr<ATTR_FRU_ID>();
                            l_procEntry.procs =
                                        (prData[7] & VPD_VINI_PR_B7_MASK) + 1;
                            l_procEntry.maxEXs = l_procEntry.procs *
                                        (prData[2] & VPD_VINI_PR_B2_MASK)
                                            >> VPD_VINI_PR_B2_SHIFT;
                            l_procPRList.push_back(l_procEntry);

                            if (l_procEntry.maxEXs == 0)
                            {
                                // this is PROBABLY bad PR, so YELL...
                                HWAS_ERR("pTarget %.8X - PR VPD says 0 CORES",
                                    pTarget->getAttr<ATTR_HUID>());
                            }
                        }
                    }
                } // TYPE_PROC
            } // CLASS_CHIP

            HWAS_DBG("pTarget %.8X - detected present, %sfunctional",
                pTarget->getAttr<ATTR_HUID>(),
                chipFunctional ? "" : "NOT ");

            // now need to mark all of this target's
            //  physical descendants as present and functional as appropriate
            TargetHandleList pDescList;
            targetService().getAssociated( pDescList, pTarget,
                TargetService::CHILD, TargetService::ALL);
            for (TargetHandleList::const_iterator pDesc_it = pDescList.begin();
                    pDesc_it != pDescList.end();
                    ++pDesc_it)
            {
                TargetHandle_t pDesc = *pDesc_it;
                // by default, the descendant's functionality is 'inherited'
                bool descFunctional = chipFunctional;

                if (chipFunctional)
                {   // if the chip is functional, the look through the
                    //  partialGood vector to see if its chiplets
                    //  are functional
                    if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_XBUS) &&
                        (pgData[VPD_CP00_PG_XBUS_INDEX] !=
                            VPD_CP00_PG_XBUS_GOOD))
                    {
                        HWAS_INF("pDesc %.8X - XBUS  pgPdata[%d]: expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_XBUS_INDEX,
                            VPD_CP00_PG_XBUS_GOOD);
                        descFunctional = false;
                    }
                    else
                    if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_ABUS) &&
                        (pgData[VPD_CP00_PG_ABUS_INDEX] !=
                            VPD_CP00_PG_ABUS_GOOD))
                    {
                        HWAS_INF("pDesc %.8X - ABUS pgPdata[%d]: expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_ABUS_INDEX,
                            VPD_CP00_PG_ABUS_GOOD);
                        descFunctional = false;
                    }
                    else
                    if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_PCI) &&
                        (pgData[VPD_CP00_PG_PCIE_INDEX] !=
                            VPD_CP00_PG_PCIE_GOOD))
                    {
                        HWAS_INF("pDesc %.8X - PCIe pgPdata[%d]: expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(),
                            VPD_CP00_PG_PCIE_INDEX,
                            VPD_CP00_PG_PCIE_GOOD);
                        descFunctional = false;
                    }
                    else
                    if ((pDesc->getAttr<ATTR_TYPE>() == TYPE_EX) ||
                        (pDesc->getAttr<ATTR_TYPE>() == TYPE_CORE)
                       )
                    {
                      ATTR_CHIP_UNIT_type indexEX =
                                pDesc->getAttr<ATTR_CHIP_UNIT>();
                      if (pgData[VPD_CP00_PG_EX0_INDEX + indexEX] !=
                            VPD_CP00_PG_EX0_GOOD)
                      {
                        HWAS_INF("pDesc %.8X - CORE/EX%d pgPdata[%d]: expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(), indexEX,
                            VPD_CP00_PG_EX0_INDEX + indexEX,
                            VPD_CP00_PG_EX0_GOOD);
                        descFunctional = false;
                      }
                    }
                    else
                    if (pDesc->getAttr<ATTR_TYPE>() == TYPE_MCS)
                    {
                      ATTR_CHIP_UNIT_type indexMCS =
                                pDesc->getAttr<ATTR_CHIP_UNIT>();
                      // check: MCS 0..3 in MCL, MCS 4..7 in MCR
                      if (((indexMCS >=0) && (indexMCS <=3)) &&
                          ((pgData[VPD_CP00_PG_POWERBUS_INDEX] &
                            VPD_CP00_PG_POWERBUS_MCL) == 0))
                      {
                        HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCL expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(), indexMCS,
                            VPD_CP00_PG_POWERBUS_INDEX,
                            VPD_CP00_PG_POWERBUS_MCL);
                        descFunctional = false;
                      }
                      else
                      if (((indexMCS >=4) && (indexMCS <=7)) &&
                          ((pgData[VPD_CP00_PG_POWERBUS_INDEX] &
                            VPD_CP00_PG_POWERBUS_MCR) == 0))
                      {
                        HWAS_INF("pDesc %.8X - MCS%d pgPdata[%d]: MCR expected 0x%04X - bad",
                            pDesc->getAttr<ATTR_HUID>(), indexMCS,
                            VPD_CP00_PG_POWERBUS_INDEX,
                            VPD_CP00_PG_POWERBUS_MCR);
                        descFunctional = false;
                      }
                    }
                } // chipFunctional

                // for sub-parts, if it's not functional, it's not present.
                enableHwasState(pDesc, descFunctional, descFunctional,
                                errlEid);
                HWAS_DBG("pDesc %.8X - marked %spresent, %sfunctional",
                    pDesc->getAttr<ATTR_HUID>(),
                    descFunctional ? "" : "NOT ",
                    descFunctional ? "" : "NOT ");
            }

            // set HWAS state to show CHIP is present, functional per above
            enableHwasState(pTarget, chipPresent, chipFunctional, errlEid);

        } // for pTarget_it
Ejemplo n.º 10
0
errlHndl_t runStep(const TargetHandleList & i_targetList)
{
    MDIA_FAST("memory diagnostics entry with %d target(s)",
            i_targetList.size());

    // memory diagnostics ipl step entry point

    errlHndl_t err = 0;

    Globals globals;

    TargetHandle_t top = 0;
    targetService().getTopLevelTarget(top);

    if(top)
    {
        globals.mfgPolicy = top->getAttr<ATTR_MNFG_FLAGS>();

        uint8_t maxMemPatterns =
            top->getAttr<ATTR_RUN_MAX_MEM_PATTERNS>();

        // This registry / attr is the same as the
        // exhaustive mnfg one
        if(maxMemPatterns)
        {
            globals.mfgPolicy |=
              MNFG_FLAG_ENABLE_EXHAUSTIVE_PATTERN_TEST;
        }

        globals.simicsRunning = Util::isSimicsRunning();

        globals.disableScrubs =
         top->getAttr<ATTR_DISABLE_SCRUB_AFTER_PATTERN_TEST>();
    }

    // get the workflow for each target mba passed in.
    // associate each workflow with the target handle.

    WorkFlowAssocMap list;

    TargetHandleList::const_iterator tit;
    DiagMode mode;

    for(tit = i_targetList.begin(); tit != i_targetList.end(); ++tit)
    {
        err = getMbaDiagnosticMode(globals, *tit, mode);

        if(err)
        {
            break;
        }

        err = getMbaWorkFlow(mode, list[*tit], globals);

        if(err)
        {
            break;
        }
    }

    if(!err)
    {
        // set global data
        Singleton<StateMachine>::instance().setGlobals(globals);

        // TODO...run the workflow through the state machine
        err = Singleton<StateMachine>::instance().run(list);
    }

    // ensure threads and pools are shutdown when finished

    doStepCleanup(globals);

    return err;

}
Ejemplo n.º 11
0
errlHndl_t ErrDataService::GenerateSrcPfa( ATTENTION_TYPE i_attnType,
                                           ServiceDataCollector & io_sdc,
                                           bool & o_initiateHwudump,
                                           TargetHandle_t & o_dumpTrgt,
                                           errlHndl_t & o_dumpErrl,
                                           uint32_t & o_dumpErrlActions)
{
    #define PRDF_FUNC "[ErrDataService::GenerateSrcPfa] "

    o_initiateHwudump = false;
    o_dumpTrgt        = NULL;
    o_dumpErrl        = NULL;
    o_dumpErrlActions = 0;

    // First, check if an error log should be committed. Note that there should
    // always be an error log if there was a system or unit checkstop.
    if ( io_sdc.queryDontCommitErrl() &&
         MACHINE_CHECK != i_attnType && !io_sdc.IsUnitCS() )
    {
        // User did not want this error log committed. No need to continue. So
        // delete it and exit.
        delete iv_errl; iv_errl = NULL;
        return NULL;
    }

#ifdef __HOSTBOOT_MODULE
    using namespace ERRORLOG;
    using namespace HWAS;
#else
    uint8_t sdcSaveFlags = SDC_NO_SAVE_FLAGS;
    size_t  sz_uint8    = sizeof(uint8_t);
#endif

    epubProcedureID thisProcedureID;

    bool ForceTerminate = false;
    bool iplDiagMode = false;

    ++iv_serviceActionCounter;

    uint16_t PRD_Reason_Code = 0;

    //**************************************************************
    // Callout loop to set up Reason code and SRC word 9
    //**************************************************************

    // Must go thru callout list to look for RIOPORT procedure callouts,
    // since they require the port info to be in SRC Word 9
    bool HW = false;
    bool SW = false;
    bool SW_High = false;
    bool SecondLevel = false;
    uint32_t SrcWord7 = 0;
    uint32_t SrcWord9 = 0;

    // Should not gard hardware if there is a hardware callout at LOW priority
    // and a symbolic FRU indicating a possibility of a software error at MED or
    // HIGH priority.
    bool sappSwNoGardReq = false, sappHwNoGardReq = false;

    const SDC_MRU_LIST & mruList = io_sdc.getMruList();
    int32_t calloutsPlusDimms = mruList.size();

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        PRDcallout thiscallout = it->callout;

        if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() )
        {
            if ( (SP_CODE     == thiscallout.flatten()) ||
                 (SYS_SW_CODE == thiscallout.flatten()) )
            {
                SW = true;

                if ( MRU_LOW != it->priority )
                {
                    sappSwNoGardReq = true;
                }

                if ( MRU_MED == it->priority )
                {
                    SW_High = true;
                }
            }
            else if ( LEVEL2_SUPPORT == thiscallout.flatten())
            {
                SecondLevel = true;

                if ( MRU_LOW != it->priority )
                {
                    sappSwNoGardReq = true;
                }
            }
        }
        else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() )
        {
            MemoryMru memMru (thiscallout.flatten());
            SrcWord9 = memMru.toUint32(); // Get MemMru value

            TargetHandleList partList = memMru.getCalloutList();
            uint32_t partCount = partList.size();

            calloutsPlusDimms = calloutsPlusDimms + partCount -1;
            HW = true; //hardware callout

            if ( MRU_LOW == it->priority )
            {
                sappHwNoGardReq = true;
            }
        }
        else // PRDcalloutData::TYPE_TARGET
        {
            HW = true; // Hardware callout

            // Determines if all the hardware callouts have low priority.

            if ( MRU_LOW == it->priority )
            {
                sappHwNoGardReq = true;
            }
        }
    }

    ////////////////////////////////////////////////////////////////
    //Set the PRD Reason Code based on the flags set in the above callout loop.
    ////////////////////////////////////////////////////////////////

    if (HW == true && SW == true)
    {
        if (SW_High == true)
            PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE_PROBABLE;
        else
            PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    }
    else if (HW == true && SW == false && SecondLevel == true)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    else if (HW == true && SW == false && SecondLevel == false)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE;
    else if (HW == false && SW == true)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE;
    else
    {
        // If we get here both HW and SW flags were false. Callout may be
        // Second Level Support only, or a procedure not checked in the SW
        // flag code.
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    }

    SrcWord7  = io_sdc.getPrimaryAttnType() << 8;
    SrcWord7 |= io_sdc.getSecondaryAttnType();

    //--------------------------------------------------------------------------
    // Check for IPL Diag Mode
    //--------------------------------------------------------------------------

    #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)

    iplDiagMode = PlatServices::isInMdiaMode();

    #endif

    //**************************************************************
    // Update Error Log with SRC
    //**************************************************************
    ErrorSignature * esig = io_sdc.GetErrorSignature();

    updateSrc( esig->getChipId(), SrcWord7, esig->getSigId(),
               SrcWord9, PRD_Reason_Code);

    //**************************************************************
    //  Add SDC Capture data to Error Log User Data here only if
    //    there are 4 or more callouts,
    //    (including Dimm callouts in the MemoryMru).
    //**************************************************************
    bool capDataAdded = false;
    if (calloutsPlusDimms > 3)
    {
        AddCapData( io_sdc.GetCaptureData(),    iv_errl );
        AddCapData( io_sdc.getTraceArrayData(), iv_errl );
        capDataAdded = true;
    }

    //--------------------------------------------------------------------------
    // Set the error log severity and get the error log action flags.
    //--------------------------------------------------------------------------

    // Let's assume the default is the action for a system checkstop.

    #ifdef __HOSTBOOT_MODULE
    errlSeverity_t errlSev = ERRL_SEV_UNRECOVERABLE;
    #else
    errlSeverity   errlSev = ERRL_SEV_UNRECOVERABLE;
    #endif

    uint32_t errlAct = ERRL_ACTION_SA        | // Service action required.
                       ERRL_ACTION_REPORT    | // Report to HMC and hypervisor.
                       ERRL_ACTION_CALL_HOME;  // Call home.

    if ( MACHINE_CHECK != i_attnType ) // Anything other that a system checkstop
    {
        if ( io_sdc.queryServiceCall() ) // still a serviceable event
        {
            errlSev = ERRL_SEV_PREDICTIVE;
        }
        else // not a serviceable event
        {
            errlSev = io_sdc.queryLogging()
                            ? ERRL_SEV_RECOVERED      // should still be logged
                            : ERRL_SEV_INFORMATIONAL; // can be ignored
            errlAct = ERRL_ACTION_HIDDEN;
        }
    }

    // This needs to be done after setting the SRCs otherwise it will be
    // overridden.
    iv_errl->setSev( errlSev );

    // Add procedure callout for SUE attentions. The intent is to make sure the
    // customer looks for other service actions before replacing parts for this
    // attention.
    if ( io_sdc.IsSUE() )
    {
        PRDF_HW_ADD_PROC_CALLOUT( SUE_PREV_ERR, MRU_HIGH, iv_errl, errlSev );
    }

    //--------------------------------------------------------------------------
    // Get the global gard policy.
    //--------------------------------------------------------------------------

    HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_NULL;

    // Gard only if the error is a serviceable event.
    if ( io_sdc.queryServiceCall() )
    {
        // We will not Resource Recover on a checkstop attention.
        gardPolicy = ( MACHINE_CHECK == i_attnType ) ? HWAS::GARD_Fatal
                                                     : HWAS::GARD_Predictive;
    }

    if ( io_sdc.IsSUE() && ( MACHINE_CHECK == i_attnType ) )
    {
        // If we are logging an error for an SUE consumed, we should not
        // perform any GARD here. Appropriate resources should have already
        // been GARDed for the original UE.
        gardPolicy = HWAS::GARD_NULL;
    }

    // Apply special policies for OPAL.
    if ( isHyprConfigOpal() &&                          // OPAL is used
         !isMfgAvpEnabled() && !isMfgHdatAvpEnabled() ) // No AVPs running
    {
        // OPAL has requested that we disable garding for predictive errors
        // found at runtime.
        if ( HWAS::GARD_Predictive == gardPolicy )
        {
            #if !defined(__HOSTBOOT_MODULE) // FSP only

            if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL;

            #elif defined(__HOSTBOOT_RUNTIME) // HBRT only

            gardPolicy = HWAS::GARD_NULL;

            #endif
        }
        // OPAL has requested that we diable garding for fatal errors (system
        // checkstops) that could have been caused by a software generated
        // attention at runtime. This will be determined if there is a software
        // callout with higher priority than a hardware callout.
        else if ( HWAS::GARD_Fatal == gardPolicy &&
                  sappSwNoGardReq && sappHwNoGardReq ) // Gard requirements met
        {
            #if !defined(__HOSTBOOT_MODULE) // FSP only

            if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL;

            #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only

                #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS

                // Checkstop analysis is only done at the beginning of the IPL,
                // regardless if the checkstop actually occurred during the IPL
                // or at runtime. We will need to check the IPL state in FIR
                // data to determine when the checkstop occurred.

                // Get access to IPL state info from the FIR data in the PNOR.
                if ( !(PnorFirDataReader::getPnorFirDataReader().isIplState()) )
                    gardPolicy = HWAS::GARD_NULL;

                #endif

            #endif
        }
    }

    //--------------------------------------------------------------------------
    // Get the global deconfig policy (must be done after setting gard policy).
    //--------------------------------------------------------------------------

    HWAS::DeconfigEnum deconfigPolicy = HWAS::NO_DECONFIG;
    bool               deferDeconfig  = false;

    if ( HWAS::GARD_NULL != gardPolicy )
    {
        #if !defined(__HOSTBOOT_MODULE) // FSP only

        // Change the deconfig state based the gard type. This is only required
        // to control what the FSP does during the reconfig loop.
        deconfigPolicy = HWSV::SvrError::isInHwReconfLoop() ? HWAS::DECONFIG
                                                            : HWAS::NO_DECONFIG;

        #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only

        // Deferred Deconfig should be used throughout all of Hostboot (both
        // checkForIplAttns() and MDIA).
        deconfigPolicy = HWAS::DECONFIG;
        deferDeconfig  = true;

        #endif
    }

    //--------------------------------------------------------------------------
    // Get the HCDB diagnostics policy.
    //--------------------------------------------------------------------------

    HWSV::hwsvDiagUpdate l_diagUpdate = HWSV::HWSV_DIAG_NEEDED;
    if ( ERRL_ACTION_HIDDEN == errlAct )
    {
        // Diagnostics is not needed in the next IPL cycle for non-visible logs.
        l_diagUpdate = HWSV::HWSV_DIAG_NOT_NEEDED;
    }

    //--------------------------------------------------------------------------
    // Initialize the PFA data
    //--------------------------------------------------------------------------

    PfaData pfaData;

    initPfaData( io_sdc, i_attnType, deferDeconfig, errlAct, errlSev,
                 gardPolicy, pfaData, o_dumpTrgt );

    //--------------------------------------------------------------------------
    // Add each mru/callout to the error log.
    //--------------------------------------------------------------------------

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        PRDcallout  thiscallout  = it->callout;
        PRDpriority thispriority = it->priority;

        // Use the global gard/deconfig policies as default.
        HWAS::GARD_ErrorType thisGard     = gardPolicy;
        HWAS::DeconfigEnum   thisDeconfig = deconfigPolicy;

        // Change the gard/deconfig actions if this MRU should not be garded.
        if ( NO_GARD == it->gardState )
        {
            thisGard     = HWAS::GARD_NULL;
            thisDeconfig = HWAS::NO_DECONFIG;
        }

        // Add the callout to the PFA data
        addCalloutToPfaData( pfaData, thiscallout, thispriority, thisGard );

        // Add callout based on callout type.
        if( PRDcalloutData::TYPE_TARGET == thiscallout.getType() )
        {
            PRDF_HW_ADD_CALLOUT(thiscallout.getTarget(),
                                thispriority,
                                thisDeconfig,
                                iv_errl,
                                thisGard,
                                errlSev,
                                l_diagUpdate);
        }
        else if(PRDcalloutData::TYPE_PROCCLK == thiscallout.getType() ||
                PRDcalloutData::TYPE_PCICLK  == thiscallout.getType())
        {
            PRDF_ADD_CLOCK_CALLOUT(iv_errl,
                                   thiscallout.getTarget(),
                                   thiscallout.getType(),
                                   thispriority,
                                   thisDeconfig,
                                   thisGard);
        }
        else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() )
        {
            MemoryMru memMru (thiscallout.flatten());

            TargetHandleList partList = memMru.getCalloutList();
            for ( TargetHandleList::iterator it = partList.begin();
                  it != partList.end(); it++ )
            {
                PRDF_HW_ADD_CALLOUT( *it,
                                     thispriority,
                                     thisDeconfig,
                                     iv_errl,
                                     thisGard,
                                     errlSev,
                                     l_diagUpdate );
            }
        }
        else if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() )
        {
            thisProcedureID = epubProcedureID(thiscallout.flatten());

            PRDF_DTRAC( PRDF_FUNC "thisProcedureID: %x, thispriority: %x, "
                        "errlSev: %x", thisProcedureID, thispriority,errlSev );

            PRDF_HW_ADD_PROC_CALLOUT(thisProcedureID,
                                     thispriority,
                                     iv_errl,
                                     errlSev);

            // Use the flags set earlier to determine if the callout is just
            // Software (SP code or Phyp Code). Add a Second Level Support
            // procedure callout Low, for this case.
            if (HW == false && SW == true && SecondLevel == false)
            {
                PRDF_DTRAC( PRDF_FUNC "thisProcedureID= %x, thispriority=%x, "
                            "errlSev=%x", LEVEL2_SUPPORT, MRU_LOW, errlSev );

                PRDF_HW_ADD_PROC_CALLOUT( LEVEL2_SUPPORT, MRU_LOW, iv_errl,
                                          errlSev );

                SecondLevel = true;
            }
        }
    }

    // Send the dynamic memory Dealloc message for DIMMS for Predictive
    // callouts.
    // We can not check for ERRL severity here as there are some cases
    // e.g. DD02 where we create a Predictive error log but callouts
    // are not predictive.
    if ( HWAS::GARD_Predictive == gardPolicy )
    {
        deallocateDimms( mruList );
    }

    //**************************************************************
    // Check for Terminating the system for non mnfg conditions.
    //**************************************************************

    ForceTerminate = checkForceTerm( io_sdc, o_dumpTrgt, pfaData );

    //*************************************************************
    // Check for Manufacturing Mode terminate here and then do
    // the needed overrides on ForceTerminate flag.
    //*************************************************************
    if ( PlatServices::mnfgTerminate() && !ForceTerminate )
    {
        ForceTerminate = true;
        if ( !((errlSev == ERRL_SEV_RECOVERED) ||
               (errlSev == ERRL_SEV_INFORMATIONAL)) &&
             iplDiagMode  &&
             !HW )
        {
            //Terminate in Manufacturing Mode, in IPL mode, for visible log, with no HW callouts.
            PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG);
        }
        // Do not terminate if recoverable or informational.
        // Do not terminate if deferred deconfig.
        else if ( deferDeconfig                            ||
                  (errlSev == ERRL_SEV_RECOVERED    ) ||
                  (errlSev == ERRL_SEV_INFORMATIONAL)  )
        {
            ForceTerminate = false;
            errlAct |= ERRL_ACTION_DONT_TERMINATE;
        }
        else
        {
            PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG);
        }

        pfaData.errlActions = errlAct;
    }

    // Needed to move the errl add user data sections here because of some updates
    // of the data required in the Aysnc section for the SMA dual reporting fix.

    //**************************************************************
    // Add the PFA data to Error Log User Data
    //**************************************************************
    UtilMem l_membuf;
    l_membuf << pfaData;
    PRDF_ADD_FFDC( iv_errl, (const char*)l_membuf.base(), l_membuf.size(),
                   ErrlVer1, ErrlSectPFA5_1 );

    //**************************************************************
    // Add SDC Capture data to Error Log User Data
    //**************************************************************
    // Pulled some code out to incorporate into AddCapData
    // Check to make sure Capture Data wasn't added earlier.
    if (!capDataAdded)
    {
        AddCapData( io_sdc.GetCaptureData(),    iv_errl );
        AddCapData( io_sdc.getTraceArrayData(), iv_errl );
    }

    //**************************************************************************
    // Add extended MemoryMru error log sections (if needed).
    //**************************************************************************

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        // Operate only on MemoryMru callouts.
        if ( PRDcalloutData::TYPE_MEMMRU != it->callout.getType() ) continue;

/* TODO RTC 136125
        // Only add single DIMM callouts. Otherwise, the parsed data is
        // redundant.
        MemoryMru memMru ( it->callout.flatten() );
        if ( !memMru.getSymbol().isValid() ) continue;

        // Add the MemoryMru to the capture data.
        CenMbaCaptureData::addExtMemMruData( memMru, iv_errl );
*/
    }

    //**************************************************************************
    // Additional FFDC
    //**************************************************************************

    // For OP checkstop analysis, add a string indicating a system checkstop
    // occurred and when. This will be printed out in the console traces along
    // with the error log.
    #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) // IPL only
    #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS

    if ( MACHINE_CHECK == i_attnType )
    {
        const char * const str =
            PnorFirDataReader::getPnorFirDataReader().isIplState()
                ? "System checkstop occurred during IPL on previous boot"
                : "System checkstop occurred during runtime on previous boot";

        ErrlUserDetailsString(str).addToLog(iv_errl);
    }

    #endif
    #endif

    // Collect PRD traces.
    // NOTE: Each line of a trace is on average 36 bytes so 768 bytes should get
    //       us around 21 lines of trace output.
    PRDF_COLLECT_TRACE(iv_errl, 768);

    //**************************************************************
    // Commit the error log.
    // This will also perform Gard and Deconfig actions.
    // Do the Unit Dumps if needed.
    //**************************************************************

    // Add the MNFG trace information.
    MnfgTrace( io_sdc.GetErrorSignature(), pfaData );

    // If this is not a terminating condition, commit the error log. If the
    // error log is not committed, the error log will be passed back to
    // PRDF::main() and eventually ATTN.
    if ( MACHINE_CHECK != pfaData.priAttnType && !ForceTerminate &&
         !pfaData.TERMINATE )
    {
        // Handle any unit checkstop conditions, if needed (i.e. runtime
        // deconfiguration, dump/FFDC collection, etc.
        if ( io_sdc.IsUnitCS() && !io_sdc.IsUsingSavedSdc() )
        {
            handleUnitCS( io_sdc, o_dumpTrgt, o_initiateHwudump );
        }

        if ( true == o_initiateHwudump )
        {
            // the dump log will be deleted later in PRDF::main
            // after the hwudump is initiated there.
            o_dumpErrl = iv_errl;
            iv_errl = NULL;
            o_dumpErrlActions = errlAct;
            PRDF_TRAC( PRDF_FUNC "for target: 0x%08x, i_errl: 0x%08x, "
                       "i_errlActions: 0x%08x", getHuid(o_dumpTrgt),
                       ERRL_GETRC_SAFE(o_dumpErrl), o_dumpErrlActions );
        }
        else
        {
            // Commit the error log.
            commitErrLog( iv_errl, pfaData );
        }
    }

#ifndef __HOSTBOOT_MODULE
    errlHndl_t reg_errl = UtilReg::read ("prdf/RasServices", &sdcSaveFlags, sz_uint8);
    if (reg_errl)
    {
        PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry read" );
        PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT);
    }
    else
    {
        //Turn off indicator that there is saved Sdc Analysis info
        sdcSaveFlags &= ( ~SDC_ANALYSIS_SAVE_FLAG );
        reg_errl = UtilReg::write ("prdf/RasServices", &sdcSaveFlags, sz_uint8);
        if (reg_errl)
        {
            PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry write" );
            PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT);
        }
    }
#endif

    PRDF_INF( PRDF_FUNC "PRD called to analyze an error: 0x%08x 0x%08x",
              esig->getChipId(), esig->getSigId() );

    // Reset iv_errl to NULL. This is done to catch logical bug in our code.
    // It enables us to assert in createInitialErrl function if iv_errl is
    // not NULL which should catch any logical bug in initial stages of testing.
    errlHndl_t o_errl = iv_errl;
    iv_errl = NULL;

    return o_errl;

    #undef PRDF_FUNC
}
Ejemplo n.º 12
0
    /**
     * @brief Stops OCCs on all Processors in the node
     */
    errlHndl_t stopAllOCCs()
    {
        TRACFCOMP( g_fapiTd,ENTER_MRK"stopAllOCCs" );
        errlHndl_t l_errl    = NULL;
        bool winkle_loaded = false;
        do {

#ifndef __HOSTBOOT_RUNTIME
            //OCC requires the build_winkle_images library
            if (  !VFS::module_is_loaded( "libbuild_winkle_images.so" ) )
            {
                l_errl = VFS::module_load( "libbuild_winkle_images.so" );

                if ( l_errl )
                {
                    //  load module returned with errl set
                    TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" );
                    // break from do loop if error occured
                    break;
                }
                winkle_loaded = true;
            }
#endif


            TargetHandleList procChips;
            getAllChips(procChips, TYPE_PROC, true);

            if(procChips.size() == 0)
            {
                TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" );
                //We'll never get this far in the IPL without any processors,
                // so just exit.
                break;
            }

            TRACFCOMP( g_fapiTd,
                       INFO_MRK"loadnStartAllOccs: %d procs found",
                       procChips.size());

            //The OCC Procedures require processors within a DCM be
            //setup together.  If DCM installed is set, we work under
            //the assumption that each nodeID is a DCM.  So sort the
            //list by NodeID then call OCC Procedures on NodeID pairs.
            std::sort(procChips.begin(),
                      procChips.end(),
                      orderByNodeAndPosition);

            //The OCC master for the node must be reset last.  For all
            //OP systems there is only a single OCC that can be the
            //master so it is safe to look at the MASTER_CAPABLE flag.
            Target* masterProc0 = NULL;
            Target* masterProc1 = NULL;

            TargetHandleList::iterator itr1 = procChips.begin();

            if(0 == (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>())
            {
                TRACUCOMP( g_fapiTd,
                       INFO_MRK"stopAllOCCs: non-dcm path entered");

                for (TargetHandleList::iterator itr = procChips.begin();
                     itr != procChips.end();
                     ++itr)
                {
                    TargetHandleList pOccs;
                    getChildChiplets(pOccs, *itr, TYPE_OCC);
                    if (pOccs.size() > 0)
                    {
                        if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() )
                        {
                            masterProc0 = *itr;
                            continue;
                        }
                    }

                    l_errl = HBOCC::stopOCC( *itr, NULL );
                    if (l_errl)
                    {
                        TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed");
                        errlCommit (l_errl, HWPF_COMP_ID);
                        // just commit and try the next chip
                    }
                }
                if (l_errl)
                {
                    break;
                }
            }
            else
            {
                TRACFCOMP( g_fapiTd,
                           INFO_MRK"stopAllOCCs: Following DCM Path");

                for (TargetHandleList::iterator itr = procChips.begin();
                     itr != procChips.end();
                     ++itr)
                {
                    Target* targ0 = *itr;
                    Target* targ1 = NULL;

                    TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: Cur target nodeID=%d",
                               targ0->getAttr<ATTR_FABRIC_NODE_ID>());

                    //if the next target in the list is in the same node
                    // they are on the same DCM, so bump itr forward
                    // and update targ0 pointer
                    if((itr+1) != procChips.end())
                    {
                        TRACFCOMP( g_fapiImpTd, INFO_MRK"stopAllOCCs: n+1 target nodeID=%d", ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>()));

                        if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) ==
                           ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>()))
                        {
                            //need to flip the numbers because we were reversed
                            targ1 = targ0;
                            itr++;
                            targ0 = *itr;
                        }
                    }

                    TargetHandleList pOccs;
                    getChildChiplets(pOccs, targ0, TYPE_OCC);
                    if (pOccs.size() > 0)
                    {
                        if( pOccs[0]->getAttr<ATTR_OCC_MASTER_CAPABLE>() )
                        {
                            masterProc0 = targ0;
                            masterProc1 = targ1;
                            continue;
                        }
                    }

                    l_errl = HBOCC::stopOCC( targ0, targ1 );
                    if (l_errl)
                    {
                        TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed");
                        errlCommit (l_errl, HWPF_COMP_ID);
                        // just commit and try the next module
                    }
                }
                if (l_errl)
                {
                    break;
                }
            }

            //now do the master OCC
            if( masterProc0 )
            {
                l_errl = HBOCC::stopOCC( masterProc0, masterProc1 );
                if (l_errl)
                {
                    TRACFCOMP( g_fapiImpTd, ERR_MRK"stopAllOCCs: stop failed on master");
                    break;
                }
            }
        } while(0);

        //make sure we always unload the module if we loaded it
        if (winkle_loaded)
        {
#ifndef __HOSTBOOT_RUNTIME
            errlHndl_t l_tmpErrl =
              VFS::module_unload( "libbuild_winkle_images.so" );
            if ( l_tmpErrl )
            {
                TRACFCOMP( g_fapiTd,ERR_MRK"stopAllOCCs: Error unloading build_winkle module" );
                if(l_errl)
                {
                    errlCommit( l_tmpErrl, HWPF_COMP_ID );
                }
                else
                {
                    l_errl = l_tmpErrl;
                }
            }
#endif
        }

        TRACFCOMP( g_fapiTd,EXIT_MRK"stopAllOCCs" );
        return l_errl;
    }
Ejemplo n.º 13
0
    /**
     * @brief Starts OCCs on all Processors in the node
     *        This is intended to be used for AVP testing.
     *
     * @param[out] o_failedOccTarget: Pointer to the target failing
     *                       loadnStartAllOccs
     * @param[in] i_useSRAM: bool - use SRAM for OCC image, ie during IPL
     *     true if during IPL, false if at end of IPL (default)
     * @return errlHndl_t  Error log if OCC load failed
     */
    errlHndl_t loadnStartAllOccs(TARGETING::Target *& o_failedOccTarget,
                                    bool i_useSRAM)
    {
        errlHndl_t  l_errl  =   NULL;
        void* homerVirtAddrBase = NULL;
        uint64_t homerPhysAddrBase = VMM_HOMER_REGION_START_ADDR;
        bool winkle_loaded = false;

        TRACUCOMP( g_fapiTd,
                   ENTER_MRK"loadnStartAllOccs(%d)", i_useSRAM);

        do {
#ifndef __HOSTBOOT_RUNTIME
            //OCC requires the build_winkle_images library
            if (  !VFS::module_is_loaded( "libbuild_winkle_images.so" ) )
            {
                l_errl = VFS::module_load( "libbuild_winkle_images.so" );

                if ( l_errl )
                {
                    //  load module returned with errl set
                    TRACFCOMP( g_fapiTd,ERR_MRK"loadnStartAllOccs: Could not load build_winkle module" );
                    // break from do loop if error occured
                    break;
                }
                winkle_loaded = true;
            }

            assert(VMM_HOMER_REGION_SIZE <= THIRTYTWO_GB,
                   "loadnStartAllOccs: Unsupported HOMER Region size");

            if (!i_useSRAM)
            {
                //If running Sapphire need to place this at the top of memory
                if(TARGETING::is_sapphire_load())
                {
                    homerPhysAddrBase = TARGETING::get_top_mem_addr();
                    assert (homerPhysAddrBase != 0,
                            "loadnStartAllOccs: Top of memory was 0!");
                    homerPhysAddrBase -= VMM_ALL_HOMER_OCC_MEMORY_SIZE;
                }
                TRACFCOMP( g_fapiTd, "HOMER is at %.16X", homerPhysAddrBase );

                //Map entire homer region into virtual memory
                homerVirtAddrBase =
                  mm_block_map(reinterpret_cast<void*>(homerPhysAddrBase),
                               VMM_HOMER_REGION_SIZE);
                TRACFCOMP( g_fapiTd, "HOMER virtaddrbase %.16X", homerVirtAddrBase );
            }
            else
            {
                // malloc space big enough for all of OCC
                homerVirtAddrBase = (void *)malloc(1 * MEGABYTE);
                homerPhysAddrBase = mm_virt_to_phys(homerVirtAddrBase);
            }
#endif

            if (i_useSRAM)
            {
                // OCC is going into L3 and SRAM so only need 1 prime and load
                // into the Master Proc
                TargetService & tS = targetService();
                Target * sysTarget = NULL;
                tS.getTopLevelTarget( sysTarget );
                assert( sysTarget != NULL );
                Target* masterproc = NULL;
                tS.masterProcChipTargetHandle( masterproc );

                /******* SETUP AND LOAD **************/
                l_errl =  primeAndLoadOcc   (masterproc,
                                             homerVirtAddrBase,
                                             homerPhysAddrBase,
                                             i_useSRAM);
                if(l_errl)
                {
                    o_failedOccTarget = masterproc;
                    TRACFCOMP( g_fapiImpTd, ERR_MRK
                     "loadnStartAllOccs:primeAndLoadOcc failed");
                    free(homerVirtAddrBase);
                    break;
                }

                /********* START OCC *************/
                l_errl = HBOCC::startOCC (masterproc, NULL, o_failedOccTarget);

                // it either started or errored; either way, free the memory
                free(homerVirtAddrBase);

                if (l_errl)
                {
                    TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed");
                    break;
                }
            }
            else
            {

            TargetHandleList procChips;
            getAllChips(procChips, TYPE_PROC, true);

            if(procChips.size() == 0)
            {
                TRACFCOMP( g_fapiTd,INFO_MRK"loadnStartAllOccs: No processors found" );
                //We'll never get this far in the IPL without any processors,
                // so just exit.
                break;
            }

            TRACUCOMP( g_fapiTd,
                       INFO_MRK"loadnStartAllOccs: %d procs found",
                       procChips.size());

            TargetHandleList::iterator itr1 = procChips.begin();
            //The OCC Procedures require processors within a DCM be
            //setup together.  So, first checking if any proc has
            //DCM installed attribute set.  If not, we can iterate
            //over the list in any order.

            //If DCM installed is set, we work under the assumption
            //that each nodeID is a DCM.  So sort the list by NodeID
            //then call OCC Procedures on NodeID pairs.
            if(0 ==
               (*itr1)->getAttr<ATTR_PROC_DCM_INSTALLED>())
            {
                TRACUCOMP( g_fapiTd,
                       INFO_MRK"loadnStartAllOccs: non-dcm path entered");

                for (TargetHandleList::iterator itr = procChips.begin();
                     itr != procChips.end();
                     ++itr)
                {
                    /******* SETUP AND LOAD **************/
                    l_errl =  primeAndLoadOcc   (*itr,
                                                 homerVirtAddrBase,
                                                 homerPhysAddrBase,
                                                 i_useSRAM);
                    if(l_errl)
                    {
                        o_failedOccTarget = *itr;
                        TRACFCOMP( g_fapiImpTd, ERR_MRK
                         "loadnStartAllOccs:primeAndLoadOcc failed");
                        break;
                    }

                    /********* START OCC *************/
                    l_errl = HBOCC::startOCC (*itr, NULL, o_failedOccTarget);
                    if (l_errl)
                    {
                        TRACFCOMP( g_fapiImpTd, ERR_MRK"loadnStartAllOccs: startOCC failed");
                        break;
                    }
                }
                if (l_errl)
                {
                    break;
                }

            }
            else
            {
                TRACUCOMP( g_fapiTd,
                           INFO_MRK"loadnStartAllOccs: Following DCM Path");

                std::sort(procChips.begin(),
                          procChips.end(),
                          orderByNodeAndPosition);

                TRACUCOMP( g_fapiTd,
                           INFO_MRK"loadnStartAllOccs: procChips list sorted");

                for (TargetHandleList::iterator itr = procChips.begin();
                     itr != procChips.end();
                     ++itr)
                {
                    TRACUCOMP( g_fapiImpTd, INFO_MRK"loadnStartAllOccs: Insepcting first target" );
                    Target* targ0 = *itr;
                    Target* targ1 = NULL;

                    TRACUCOMP( g_fapiImpTd, INFO_MRK
                               "loadnStartAllOccs: Cur target nodeID=%d",
                               targ0->getAttr<ATTR_FABRIC_NODE_ID>());


                    //if the next target in the list is in the same node
                    // they are on the same DCM, so bump itr forward
                    // and update targ1 pointer
                    if((itr+1) != procChips.end())
                    {
                        TRACUCOMP( g_fapiImpTd, INFO_MRK
                                   "loadnStartAllOccs: n+1 target nodeID=%d",
                                   ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>())
                                   );

                        if((targ0->getAttr<ATTR_FABRIC_NODE_ID>()) ==
                           ((*(itr+1))->getAttr<ATTR_FABRIC_NODE_ID>()))
                        {
                            itr++;
                            targ1 = *itr;
                        }
                    }

                    /********** Setup and load targ0 ***********/
                    l_errl =  primeAndLoadOcc   (targ0,
                                                 homerVirtAddrBase,
                                                 homerPhysAddrBase,
                                                 i_useSRAM);
                    if(l_errl)
                    {
                        o_failedOccTarget = targ0;
                        TRACFCOMP( g_fapiImpTd, ERR_MRK
                                   "loadnStartAllOccs: "
                                   "primeAndLoadOcc failed on targ0");
                        break;
                    }

                    /*********** Setup and load targ1 **********/
                    l_errl =  primeAndLoadOcc   (targ1,
                                                 homerVirtAddrBase,
                                                 homerPhysAddrBase,
                                                 i_useSRAM);
                    if(l_errl)
                    {
                        o_failedOccTarget = targ1;
                        TRACFCOMP( g_fapiImpTd, ERR_MRK
                                   "loadnStartAllOccs: "
                                   "primeAndLoadOcc failed on targ1");
                        break;
                    }

                    /*********** Start OCC *******************/
                    l_errl = HBOCC::startOCC (targ0, targ1, o_failedOccTarget);
                    if (l_errl)
                    {
                        TRACFCOMP( g_fapiImpTd, ERR_MRK
                                   "loadnStartAllOccs: start failed");
                        break;
                    }
                }
                if (l_errl)
                {
                    break;
                }
            }
            }
        } while(0);

        errlHndl_t  l_tmpErrl  =   NULL;
//Unless HTMGT is in use, there are no further accesses to HOMER memory
//required during the IPL
#ifndef CONFIG_HTMGT
        if(homerVirtAddrBase)
        {
            int rc = 0;
            rc =  mm_block_unmap(homerVirtAddrBase);
            if (rc != 0)
            {
                /*@
                 * @errortype
                 * @moduleid     fapi::MOD_OCC_LOAD_START_ALL_OCCS
                 * @reasoncode   fapi::RC_MM_UNMAP_ERR
                 * @userdata1    Return Code
                 * @userdata2    Unmap address
                 * @devdesc      mm_block_unmap() returns error
                 * @custdesc    A problem occurred during the IPL
                 *              of the system.
                 */
                l_tmpErrl =
                  new ERRORLOG::ErrlEntry(
                                          ERRORLOG::ERRL_SEV_UNRECOVERABLE,
                                          fapi::MOD_OCC_LOAD_START_ALL_OCCS,
                                          fapi::RC_MM_UNMAP_ERR,
                                          rc,
                                          reinterpret_cast<uint64_t>
                                          (homerVirtAddrBase));
                if(l_tmpErrl)
                {
                    if(l_errl)
                    {
                        errlCommit( l_tmpErrl, HWPF_COMP_ID );
                    }
                    else
                    {
                        l_errl = l_tmpErrl;
                    }
                }
            }
        }
#endif
        //make sure we always unload the module
        if (winkle_loaded)
        {
            l_tmpErrl = VFS::module_unload( "libbuild_winkle_images.so" );
            if ( l_tmpErrl )
            {
                TRACFCOMP
                    ( g_fapiTd,ERR_MRK
                      "loadnStartAllOccs: Error unloading build_winkle module"
                      );
                if(l_errl)
                {
                    errlCommit( l_tmpErrl, HWPF_COMP_ID );
                }
                else
                {
                    l_errl = l_tmpErrl;
                }
            }
        }

        TRACUCOMP( g_fapiTd,
                   EXIT_MRK"loadnStartAllOccs" );

        return l_errl;
    }