void ServiceDataCollector::AddSignatureList( ErrorSignature & i_sig )
{
    #define PRDF_FUNC "[ServiceDataCollector::AddSignatureList] "

     TARGETING::TargetHandle_t tgt = PlatServices::getTarget(
                                                  i_sig.getChipId());

     if ( NULL != tgt )
     {
        AddSignatureList( tgt, i_sig.getSigId() );
     }
     else
     {
        PRDF_ERR( PRDF_FUNC "Failed to get target Handle for "
                          "chip:0x%08X", i_sig.getChipId() );
     }
    #undef PRDF_FUNC
}
Example #2
0
errlHndl_t SimErrDataService::GenerateSrcPfa(ATTENTION_TYPE attn_type,
                                             ServiceDataCollector & i_sdc)

{
    using namespace TARGETING;
    using namespace PlatServices;

    PRDF_ENTER("SimErrDataService::GenerateSrcPfa()");
    errlHndl_t errLog = NULL;

    // call the actual ras services function
    errLog = ErrDataService::GenerateSrcPfa(attn_type, i_sdc);

    ErrorSignature * esig = i_sdc.GetErrorSignature();

    // report the actual signature
    getSimServices().reportSig(esig->getChipId(), esig->getSigId());

    PRDF_EXIT("SimErrDataService::GenerateSrcPfa()");

    return errLog;

}
errlHndl_t ErrDataService::GenerateSrcPfa( ATTENTION_TYPE i_attnType,
                                           ServiceDataCollector & io_sdc,
                                           bool & o_initiateHwudump,
                                           TargetHandle_t & o_dumpTrgt,
                                           errlHndl_t & o_dumpErrl,
                                           uint32_t & o_dumpErrlActions)
{
    #define PRDF_FUNC "[ErrDataService::GenerateSrcPfa] "

    o_initiateHwudump = false;
    o_dumpTrgt        = NULL;
    o_dumpErrl        = NULL;
    o_dumpErrlActions = 0;

    // First, check if an error log should be committed. Note that there should
    // always be an error log if there was a system or unit checkstop.
    if ( io_sdc.queryDontCommitErrl() &&
         MACHINE_CHECK != i_attnType && !io_sdc.IsUnitCS() )
    {
        // User did not want this error log committed. No need to continue. So
        // delete it and exit.
        delete iv_errl; iv_errl = NULL;
        return NULL;
    }

#ifdef __HOSTBOOT_MODULE
    using namespace ERRORLOG;
    using namespace HWAS;
#else
    uint8_t sdcSaveFlags = SDC_NO_SAVE_FLAGS;
    size_t  sz_uint8    = sizeof(uint8_t);
#endif

    epubProcedureID thisProcedureID;

    bool ForceTerminate = false;
    bool iplDiagMode = false;

    ++iv_serviceActionCounter;

    uint16_t PRD_Reason_Code = 0;

    //**************************************************************
    // Callout loop to set up Reason code and SRC word 9
    //**************************************************************

    // Must go thru callout list to look for RIOPORT procedure callouts,
    // since they require the port info to be in SRC Word 9
    bool HW = false;
    bool SW = false;
    bool SW_High = false;
    bool SecondLevel = false;
    uint32_t SrcWord7 = 0;
    uint32_t SrcWord9 = 0;

    // Should not gard hardware if there is a hardware callout at LOW priority
    // and a symbolic FRU indicating a possibility of a software error at MED or
    // HIGH priority.
    bool sappSwNoGardReq = false, sappHwNoGardReq = false;

    const SDC_MRU_LIST & mruList = io_sdc.getMruList();
    int32_t calloutsPlusDimms = mruList.size();

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        PRDcallout thiscallout = it->callout;

        if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() )
        {
            if ( (SP_CODE     == thiscallout.flatten()) ||
                 (SYS_SW_CODE == thiscallout.flatten()) )
            {
                SW = true;

                if ( MRU_LOW != it->priority )
                {
                    sappSwNoGardReq = true;
                }

                if ( MRU_MED == it->priority )
                {
                    SW_High = true;
                }
            }
            else if ( LEVEL2_SUPPORT == thiscallout.flatten())
            {
                SecondLevel = true;

                if ( MRU_LOW != it->priority )
                {
                    sappSwNoGardReq = true;
                }
            }
        }
        else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() )
        {
            MemoryMru memMru (thiscallout.flatten());
            SrcWord9 = memMru.toUint32(); // Get MemMru value

            TargetHandleList partList = memMru.getCalloutList();
            uint32_t partCount = partList.size();

            calloutsPlusDimms = calloutsPlusDimms + partCount -1;
            HW = true; //hardware callout

            if ( MRU_LOW == it->priority )
            {
                sappHwNoGardReq = true;
            }
        }
        else // PRDcalloutData::TYPE_TARGET
        {
            HW = true; // Hardware callout

            // Determines if all the hardware callouts have low priority.

            if ( MRU_LOW == it->priority )
            {
                sappHwNoGardReq = true;
            }
        }
    }

    ////////////////////////////////////////////////////////////////
    //Set the PRD Reason Code based on the flags set in the above callout loop.
    ////////////////////////////////////////////////////////////////

    if (HW == true && SW == true)
    {
        if (SW_High == true)
            PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE_PROBABLE;
        else
            PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    }
    else if (HW == true && SW == false && SecondLevel == true)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    else if (HW == true && SW == false && SecondLevel == false)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE;
    else if (HW == false && SW == true)
        PRD_Reason_Code = PRDF_DETECTED_FAIL_SOFTWARE;
    else
    {
        // If we get here both HW and SW flags were false. Callout may be
        // Second Level Support only, or a procedure not checked in the SW
        // flag code.
        PRD_Reason_Code = PRDF_DETECTED_FAIL_HARDWARE_PROBABLE;
    }

    SrcWord7  = io_sdc.getPrimaryAttnType() << 8;
    SrcWord7 |= io_sdc.getSecondaryAttnType();

    //--------------------------------------------------------------------------
    // Check for IPL Diag Mode
    //--------------------------------------------------------------------------

    #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)

    iplDiagMode = PlatServices::isInMdiaMode();

    #endif

    //**************************************************************
    // Update Error Log with SRC
    //**************************************************************
    ErrorSignature * esig = io_sdc.GetErrorSignature();

    updateSrc( esig->getChipId(), SrcWord7, esig->getSigId(),
               SrcWord9, PRD_Reason_Code);

    //**************************************************************
    //  Add SDC Capture data to Error Log User Data here only if
    //    there are 4 or more callouts,
    //    (including Dimm callouts in the MemoryMru).
    //**************************************************************
    bool capDataAdded = false;
    if (calloutsPlusDimms > 3)
    {
        AddCapData( io_sdc.GetCaptureData(),    iv_errl );
        AddCapData( io_sdc.getTraceArrayData(), iv_errl );
        capDataAdded = true;
    }

    //--------------------------------------------------------------------------
    // Set the error log severity and get the error log action flags.
    //--------------------------------------------------------------------------

    // Let's assume the default is the action for a system checkstop.

    #ifdef __HOSTBOOT_MODULE
    errlSeverity_t errlSev = ERRL_SEV_UNRECOVERABLE;
    #else
    errlSeverity   errlSev = ERRL_SEV_UNRECOVERABLE;
    #endif

    uint32_t errlAct = ERRL_ACTION_SA        | // Service action required.
                       ERRL_ACTION_REPORT    | // Report to HMC and hypervisor.
                       ERRL_ACTION_CALL_HOME;  // Call home.

    if ( MACHINE_CHECK != i_attnType ) // Anything other that a system checkstop
    {
        if ( io_sdc.queryServiceCall() ) // still a serviceable event
        {
            errlSev = ERRL_SEV_PREDICTIVE;
        }
        else // not a serviceable event
        {
            errlSev = io_sdc.queryLogging()
                            ? ERRL_SEV_RECOVERED      // should still be logged
                            : ERRL_SEV_INFORMATIONAL; // can be ignored
            errlAct = ERRL_ACTION_HIDDEN;
        }
    }

    // This needs to be done after setting the SRCs otherwise it will be
    // overridden.
    iv_errl->setSev( errlSev );

    // Add procedure callout for SUE attentions. The intent is to make sure the
    // customer looks for other service actions before replacing parts for this
    // attention.
    if ( io_sdc.IsSUE() )
    {
        PRDF_HW_ADD_PROC_CALLOUT( SUE_PREV_ERR, MRU_HIGH, iv_errl, errlSev );
    }

    //--------------------------------------------------------------------------
    // Get the global gard policy.
    //--------------------------------------------------------------------------

    HWAS::GARD_ErrorType gardPolicy = HWAS::GARD_NULL;

    // Gard only if the error is a serviceable event.
    if ( io_sdc.queryServiceCall() )
    {
        // We will not Resource Recover on a checkstop attention.
        gardPolicy = ( MACHINE_CHECK == i_attnType ) ? HWAS::GARD_Fatal
                                                     : HWAS::GARD_Predictive;
    }

    if ( io_sdc.IsSUE() && ( MACHINE_CHECK == i_attnType ) )
    {
        // If we are logging an error for an SUE consumed, we should not
        // perform any GARD here. Appropriate resources should have already
        // been GARDed for the original UE.
        gardPolicy = HWAS::GARD_NULL;
    }

    // Apply special policies for OPAL.
    if ( isHyprConfigOpal() &&                          // OPAL is used
         !isMfgAvpEnabled() && !isMfgHdatAvpEnabled() ) // No AVPs running
    {
        // OPAL has requested that we disable garding for predictive errors
        // found at runtime.
        if ( HWAS::GARD_Predictive == gardPolicy )
        {
            #if !defined(__HOSTBOOT_MODULE) // FSP only

            if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL;

            #elif defined(__HOSTBOOT_RUNTIME) // HBRT only

            gardPolicy = HWAS::GARD_NULL;

            #endif
        }
        // OPAL has requested that we diable garding for fatal errors (system
        // checkstops) that could have been caused by a software generated
        // attention at runtime. This will be determined if there is a software
        // callout with higher priority than a hardware callout.
        else if ( HWAS::GARD_Fatal == gardPolicy &&
                  sappSwNoGardReq && sappHwNoGardReq ) // Gard requirements met
        {
            #if !defined(__HOSTBOOT_MODULE) // FSP only

            if ( isHyprRunning() ) gardPolicy = HWAS::GARD_NULL;

            #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only

                #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS

                // Checkstop analysis is only done at the beginning of the IPL,
                // regardless if the checkstop actually occurred during the IPL
                // or at runtime. We will need to check the IPL state in FIR
                // data to determine when the checkstop occurred.

                // Get access to IPL state info from the FIR data in the PNOR.
                if ( !(PnorFirDataReader::getPnorFirDataReader().isIplState()) )
                    gardPolicy = HWAS::GARD_NULL;

                #endif

            #endif
        }
    }

    //--------------------------------------------------------------------------
    // Get the global deconfig policy (must be done after setting gard policy).
    //--------------------------------------------------------------------------

    HWAS::DeconfigEnum deconfigPolicy = HWAS::NO_DECONFIG;
    bool               deferDeconfig  = false;

    if ( HWAS::GARD_NULL != gardPolicy )
    {
        #if !defined(__HOSTBOOT_MODULE) // FSP only

        // Change the deconfig state based the gard type. This is only required
        // to control what the FSP does during the reconfig loop.
        deconfigPolicy = HWSV::SvrError::isInHwReconfLoop() ? HWAS::DECONFIG
                                                            : HWAS::NO_DECONFIG;

        #elif !defined(__HOSTBOOT_RUNTIME) // Hostboot only

        // Deferred Deconfig should be used throughout all of Hostboot (both
        // checkForIplAttns() and MDIA).
        deconfigPolicy = HWAS::DECONFIG;
        deferDeconfig  = true;

        #endif
    }

    //--------------------------------------------------------------------------
    // Get the HCDB diagnostics policy.
    //--------------------------------------------------------------------------

    HWSV::hwsvDiagUpdate l_diagUpdate = HWSV::HWSV_DIAG_NEEDED;
    if ( ERRL_ACTION_HIDDEN == errlAct )
    {
        // Diagnostics is not needed in the next IPL cycle for non-visible logs.
        l_diagUpdate = HWSV::HWSV_DIAG_NOT_NEEDED;
    }

    //--------------------------------------------------------------------------
    // Initialize the PFA data
    //--------------------------------------------------------------------------

    PfaData pfaData;

    initPfaData( io_sdc, i_attnType, deferDeconfig, errlAct, errlSev,
                 gardPolicy, pfaData, o_dumpTrgt );

    //--------------------------------------------------------------------------
    // Add each mru/callout to the error log.
    //--------------------------------------------------------------------------

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        PRDcallout  thiscallout  = it->callout;
        PRDpriority thispriority = it->priority;

        // Use the global gard/deconfig policies as default.
        HWAS::GARD_ErrorType thisGard     = gardPolicy;
        HWAS::DeconfigEnum   thisDeconfig = deconfigPolicy;

        // Change the gard/deconfig actions if this MRU should not be garded.
        if ( NO_GARD == it->gardState )
        {
            thisGard     = HWAS::GARD_NULL;
            thisDeconfig = HWAS::NO_DECONFIG;
        }

        // Add the callout to the PFA data
        addCalloutToPfaData( pfaData, thiscallout, thispriority, thisGard );

        // Add callout based on callout type.
        if( PRDcalloutData::TYPE_TARGET == thiscallout.getType() )
        {
            PRDF_HW_ADD_CALLOUT(thiscallout.getTarget(),
                                thispriority,
                                thisDeconfig,
                                iv_errl,
                                thisGard,
                                errlSev,
                                l_diagUpdate);
        }
        else if(PRDcalloutData::TYPE_PROCCLK == thiscallout.getType() ||
                PRDcalloutData::TYPE_PCICLK  == thiscallout.getType())
        {
            PRDF_ADD_CLOCK_CALLOUT(iv_errl,
                                   thiscallout.getTarget(),
                                   thiscallout.getType(),
                                   thispriority,
                                   thisDeconfig,
                                   thisGard);
        }
        else if ( PRDcalloutData::TYPE_MEMMRU == thiscallout.getType() )
        {
            MemoryMru memMru (thiscallout.flatten());

            TargetHandleList partList = memMru.getCalloutList();
            for ( TargetHandleList::iterator it = partList.begin();
                  it != partList.end(); it++ )
            {
                PRDF_HW_ADD_CALLOUT( *it,
                                     thispriority,
                                     thisDeconfig,
                                     iv_errl,
                                     thisGard,
                                     errlSev,
                                     l_diagUpdate );
            }
        }
        else if ( PRDcalloutData::TYPE_SYMFRU == thiscallout.getType() )
        {
            thisProcedureID = epubProcedureID(thiscallout.flatten());

            PRDF_DTRAC( PRDF_FUNC "thisProcedureID: %x, thispriority: %x, "
                        "errlSev: %x", thisProcedureID, thispriority,errlSev );

            PRDF_HW_ADD_PROC_CALLOUT(thisProcedureID,
                                     thispriority,
                                     iv_errl,
                                     errlSev);

            // Use the flags set earlier to determine if the callout is just
            // Software (SP code or Phyp Code). Add a Second Level Support
            // procedure callout Low, for this case.
            if (HW == false && SW == true && SecondLevel == false)
            {
                PRDF_DTRAC( PRDF_FUNC "thisProcedureID= %x, thispriority=%x, "
                            "errlSev=%x", LEVEL2_SUPPORT, MRU_LOW, errlSev );

                PRDF_HW_ADD_PROC_CALLOUT( LEVEL2_SUPPORT, MRU_LOW, iv_errl,
                                          errlSev );

                SecondLevel = true;
            }
        }
    }

    // Send the dynamic memory Dealloc message for DIMMS for Predictive
    // callouts.
    // We can not check for ERRL severity here as there are some cases
    // e.g. DD02 where we create a Predictive error log but callouts
    // are not predictive.
    if ( HWAS::GARD_Predictive == gardPolicy )
    {
        deallocateDimms( mruList );
    }

    //**************************************************************
    // Check for Terminating the system for non mnfg conditions.
    //**************************************************************

    ForceTerminate = checkForceTerm( io_sdc, o_dumpTrgt, pfaData );

    //*************************************************************
    // Check for Manufacturing Mode terminate here and then do
    // the needed overrides on ForceTerminate flag.
    //*************************************************************
    if ( PlatServices::mnfgTerminate() && !ForceTerminate )
    {
        ForceTerminate = true;
        if ( !((errlSev == ERRL_SEV_RECOVERED) ||
               (errlSev == ERRL_SEV_INFORMATIONAL)) &&
             iplDiagMode  &&
             !HW )
        {
            //Terminate in Manufacturing Mode, in IPL mode, for visible log, with no HW callouts.
            PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG);
        }
        // Do not terminate if recoverable or informational.
        // Do not terminate if deferred deconfig.
        else if ( deferDeconfig                            ||
                  (errlSev == ERRL_SEV_RECOVERED    ) ||
                  (errlSev == ERRL_SEV_INFORMATIONAL)  )
        {
            ForceTerminate = false;
            errlAct |= ERRL_ACTION_DONT_TERMINATE;
        }
        else
        {
            PRDF_SRC_WRITE_TERM_STATE_ON(iv_errl, SRCI_TERM_STATE_MNFG);
        }

        pfaData.errlActions = errlAct;
    }

    // Needed to move the errl add user data sections here because of some updates
    // of the data required in the Aysnc section for the SMA dual reporting fix.

    //**************************************************************
    // Add the PFA data to Error Log User Data
    //**************************************************************
    UtilMem l_membuf;
    l_membuf << pfaData;
    PRDF_ADD_FFDC( iv_errl, (const char*)l_membuf.base(), l_membuf.size(),
                   ErrlVer1, ErrlSectPFA5_1 );

    //**************************************************************
    // Add SDC Capture data to Error Log User Data
    //**************************************************************
    // Pulled some code out to incorporate into AddCapData
    // Check to make sure Capture Data wasn't added earlier.
    if (!capDataAdded)
    {
        AddCapData( io_sdc.GetCaptureData(),    iv_errl );
        AddCapData( io_sdc.getTraceArrayData(), iv_errl );
    }

    //**************************************************************************
    // Add extended MemoryMru error log sections (if needed).
    //**************************************************************************

    for ( SDC_MRU_LIST::const_iterator it = mruList.begin();
          it < mruList.end(); ++it )
    {
        // Operate only on MemoryMru callouts.
        if ( PRDcalloutData::TYPE_MEMMRU != it->callout.getType() ) continue;

/* TODO RTC 136125
        // Only add single DIMM callouts. Otherwise, the parsed data is
        // redundant.
        MemoryMru memMru ( it->callout.flatten() );
        if ( !memMru.getSymbol().isValid() ) continue;

        // Add the MemoryMru to the capture data.
        CenMbaCaptureData::addExtMemMruData( memMru, iv_errl );
*/
    }

    //**************************************************************************
    // Additional FFDC
    //**************************************************************************

    // For OP checkstop analysis, add a string indicating a system checkstop
    // occurred and when. This will be printed out in the console traces along
    // with the error log.
    #if defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) // IPL only
    #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS

    if ( MACHINE_CHECK == i_attnType )
    {
        const char * const str =
            PnorFirDataReader::getPnorFirDataReader().isIplState()
                ? "System checkstop occurred during IPL on previous boot"
                : "System checkstop occurred during runtime on previous boot";

        ErrlUserDetailsString(str).addToLog(iv_errl);
    }

    #endif
    #endif

    // Collect PRD traces.
    // NOTE: Each line of a trace is on average 36 bytes so 768 bytes should get
    //       us around 21 lines of trace output.
    PRDF_COLLECT_TRACE(iv_errl, 768);

    //**************************************************************
    // Commit the error log.
    // This will also perform Gard and Deconfig actions.
    // Do the Unit Dumps if needed.
    //**************************************************************

    // Add the MNFG trace information.
    MnfgTrace( io_sdc.GetErrorSignature(), pfaData );

    // If this is not a terminating condition, commit the error log. If the
    // error log is not committed, the error log will be passed back to
    // PRDF::main() and eventually ATTN.
    if ( MACHINE_CHECK != pfaData.priAttnType && !ForceTerminate &&
         !pfaData.TERMINATE )
    {
        // Handle any unit checkstop conditions, if needed (i.e. runtime
        // deconfiguration, dump/FFDC collection, etc.
        if ( io_sdc.IsUnitCS() && !io_sdc.IsUsingSavedSdc() )
        {
            handleUnitCS( io_sdc, o_dumpTrgt, o_initiateHwudump );
        }

        if ( true == o_initiateHwudump )
        {
            // the dump log will be deleted later in PRDF::main
            // after the hwudump is initiated there.
            o_dumpErrl = iv_errl;
            iv_errl = NULL;
            o_dumpErrlActions = errlAct;
            PRDF_TRAC( PRDF_FUNC "for target: 0x%08x, i_errl: 0x%08x, "
                       "i_errlActions: 0x%08x", getHuid(o_dumpTrgt),
                       ERRL_GETRC_SAFE(o_dumpErrl), o_dumpErrlActions );
        }
        else
        {
            // Commit the error log.
            commitErrLog( iv_errl, pfaData );
        }
    }

#ifndef __HOSTBOOT_MODULE
    errlHndl_t reg_errl = UtilReg::read ("prdf/RasServices", &sdcSaveFlags, sz_uint8);
    if (reg_errl)
    {
        PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry read" );
        PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT);
    }
    else
    {
        //Turn off indicator that there is saved Sdc Analysis info
        sdcSaveFlags &= ( ~SDC_ANALYSIS_SAVE_FLAG );
        reg_errl = UtilReg::write ("prdf/RasServices", &sdcSaveFlags, sz_uint8);
        if (reg_errl)
        {
            PRDF_ERR( PRDF_FUNC "Failure in SDC Sync flag Registry write" );
            PRDF_COMMIT_ERRL(reg_errl, ERRL_ACTION_REPORT);
        }
    }
#endif

    PRDF_INF( PRDF_FUNC "PRD called to analyze an error: 0x%08x 0x%08x",
              esig->getChipId(), esig->getSigId() );

    // Reset iv_errl to NULL. This is done to catch logical bug in our code.
    // It enables us to assert in createInitialErrl function if iv_errl is
    // not NULL which should catch any logical bug in initial stages of testing.
    errlHndl_t o_errl = iv_errl;
    iv_errl = NULL;

    return o_errl;

    #undef PRDF_FUNC
}