Exemple #1
0
///////////////////////////////////////////////////////////////////////////////
// ErrlManager::errlogMsgHndlr()
///////////////////////////////////////////////////////////////////////////////
void ErrlManager::errlogMsgHndlr ()
{
    TRACFCOMP( g_trac_errl, ENTER_MRK "Enter ErrlManager::errlogMsgHndlr" );

    while( 1 )
    {
        msg_t * theMsg = msg_wait( iv_msgQ );
        TRACFCOMP( g_trac_errl, INFO_MRK"Got an error log Msg - Type: 0x%08x",
                                                               theMsg->type );
        //Process message just received
        switch( theMsg->type )
        {
            case ERRLOG_ACCESS_PNOR_TYPE:
                {
                    // PNOR is up and running now.

                    setupPnorInfo();

                    //We are done with the msg
                    msg_free(theMsg);

                    // go back and wait for a next msg
                    break;
                }
            case ERRLOG_ACCESS_TARG_TYPE:
                {
                    // TARGETING is up and running now.

                    //  do we NOT need to send the error?
                    TARGETING::Target * sys = NULL;
                    TARGETING::targetService().getTopLevelTarget( sys );
                    TARGETING::SpFunctions spfn;

                    if (!(sys &&
                          sys->tryGetAttr<TARGETING::ATTR_SP_FUNCTIONS>(spfn) &&
                          spfn.baseServices))
                    {
                        iv_isSpBaseServices = false;

                        // if there are queued errors, clear the Mbox flag
                        // since they will never be sent, which will delete
                        // the errors that have been fully processed
                        ErrlListItr_t it = iv_errlList.begin();
                        while(it != iv_errlList.end())
                        {
                            // Mark MBOX processing complete
                            _clearFlag(*it, MBOX_FLAG);
                            _updateErrlListIter(it);
                        }
                    }

                    //We are done with the msg
                    msg_free(theMsg);

                    // go back and wait for a next msg
                    break;
                }
            case ERRLOG_ACCESS_MBOX_TYPE:
                {
                    // MBOX is up and running now.

                    // do we need to send the errorlog
                    TARGETING::Target * sys = NULL;
                    TARGETING::targetService().getTopLevelTarget( sys );
                    TARGETING::SpFunctions spfn;

                    if (sys &&
                        sys->tryGetAttr<TARGETING::ATTR_SP_FUNCTIONS>(spfn) &&
                        spfn.mailboxEnabled)
                    {
                        iv_isMboxEnabled = true;
                    }

                    // if we're supposed to and can now send msgs, do it.
                    if (iv_isSpBaseServices && iv_isMboxEnabled)
                    {
                        // Register messageQ with Mailbox to receive message.
                        errlHndl_t l_err =
                                MBOX::msgq_register( MBOX::HB_ERROR_MSGQ,
                                                    iv_msgQ );
                        if( l_err )
                        {
                            TRACFCOMP(g_trac_errl, ERR_MRK "Msg queue already registered");

                            delete( l_err );
                            l_err = NULL;

                            //If we got an error then it means the message queue
                            //is registered with mailbox.
                            //This should not happen. So assert here.
                            assert(0);
                        }

                        // if errors came in before MBOX was ready,
                        // the errors would be on this list. send them now.
                        ErrlListItr_t it = iv_errlList.begin();
                        while(it != iv_errlList.end())
                        {
                            // Check if MBOX processing is needed
                            if (_isFlagSet(*it, MBOX_FLAG))
                            {
                                // send errlog
                                sendErrLogToFSP(it->first);
                                // Mark MBOX processing complete
                                _clearFlag(*it, MBOX_FLAG);
                            }
                            _updateErrlListIter(it);
                        }
                    }
                    else
                    {
                        // Delete errors that have been completely processed
                        ErrlListItr_t it = iv_errlList.begin();
                        while(it != iv_errlList.end())
                        {
                            // Mark MBOX processing complete
                            _clearFlag(*it, MBOX_FLAG);
                            _updateErrlListIter(it);
                        }
                    }

                    //We are done with the msg
                    msg_free(theMsg);

                    // go back and wait for a next msg
                    break;
                }
            case ERRLOG_ACCESS_IPMI_TYPE:
                {
#ifdef CONFIG_BMC_IPMI
                    // IPMI is up and running now.
                    iv_isIpmiEnabled = true;

                    // if we can now send msgs, do it.
                    // if errors came in before IPMI was ready,
                    // the errors would be on this list. send them now.
                    ErrlListItr_t it = iv_errlList.begin();
                    while(it != iv_errlList.end())
                    {
                        // Check if IPMI processing is needed
                        if (_isFlagSet(*it, IPMI_FLAG))
                        {
                            // send errorlog
                            sendErrLogToBmc(it->first);
                            // Mark IPMI processing complete
                            _clearFlag(*it, IPMI_FLAG);
                        }
                        _updateErrlListIter(it);
                    }
#endif

                    //We are done with the msg
                    msg_free(theMsg);

                    // go back and wait for a next msg
                    break;
                }
            case ERRLOG_ACCESS_ERRLDISP_TYPE:
                {
#ifdef CONFIG_CONSOLE_OUTPUT_ERRORDISPLAY
                    // Errldisplay now ready
                    iv_isErrlDisplayEnabled = true;

                    CONSOLE::displayf("ERRL",
                        "Dumping errors reported prior to registration");

                    // Display errlogs to errldisplay
                    ErrlListItr_t it = iv_errlList.begin();
                    while(it != iv_errlList.end())
                    {
                        // Check if ERRLDISP processing is needed
                        if (_isFlagSet(*it, ERRLDISP_FLAG))
                        {
                            ERRORLOGDISPLAY::errLogDisplay().msgDisplay
                                        (it->first,
                                        ((it->first->reasonCode()) & 0xFF00));
                            // Mark ERRLDISP processing complete
                            _clearFlag(*it, ERRLDISP_FLAG);
                        }
                        _updateErrlListIter(it);
                    }
#endif
                    //We are done with the msg
                    msg_free(theMsg);

                    break;
                }
            case ERRLOG_NEEDS_TO_BE_COMMITTED_TYPE:
                {
                    // Extract error log handle from the message. We need the
                    // error log handle to pass along
                    errlHndl_t l_err = (errlHndl_t) theMsg->extra_data;

                    // Ask the ErrlEntry to assign commit component, commit time
                    l_err->commit( (compId_t) theMsg->data[0] );

                    // Pair with all flags set to add to the errlList
                    ErrlFlagPair_t l_pair(l_err, ALL_FLAGS);

#ifdef CONFIG_CONSOLE_OUTPUT_ERRORDISPLAY
                    // Display errl to errldisplay
                    if (iv_isErrlDisplayEnabled)
                    {
                        ERRORLOGDISPLAY::errLogDisplay().msgDisplay
                                            (l_err,
                                            ( (l_err->reasonCode()) & 0xFF00));
                        // Mark ERRLDISP processing complete on this error
                        _clearFlag(l_pair, ERRLDISP_FLAG);
                    }
#endif
                    //Save the error log to PNOR
                    bool l_savedToPnor = saveErrLogToPnor(l_err);

                    // Check if we actually saved the msg to PNOR
                    if (l_savedToPnor)
                    {
                        // Mark PNOR processing complete on this error
                        _clearFlag(l_pair, PNOR_FLAG);
                    }

#ifdef STORE_ERRL_IN_L3
                    //Write the error log to L3 memory
                    //useful ONLY for the hb-errl tool
                    saveErrLogEntry ( l_err );
#endif

                    //Try to send the error log if someone is there to receive
                    if (!iv_isSpBaseServices)
                    {
                        // Mark MBOX processing complete on this error
                        _clearFlag(l_pair, MBOX_FLAG);
                    }
                    else if (iv_isSpBaseServices && iv_isMboxEnabled)
                    {
                        sendErrLogToFSP(l_err);

                        // Mark MBOX processing complete on this error
                        _clearFlag(l_pair, MBOX_FLAG);
                    }

#ifdef CONFIG_BMC_IPMI
                    if (iv_isIpmiEnabled)
                    {
                        // convert to SEL/eSEL and send to BMC over IPMI
                        sendErrLogToBmc(l_err);

                        // Mark IPMI processing complete on this error
                        _clearFlag(l_pair, IPMI_FLAG);
                    }
#endif

                    //Ask the ErrlEntry to process any callouts
                    l_err->processCallout();

                    //Ask if it is a terminating log
                    if( l_err->isTerminateLog() )
                    {
                        TRACFCOMP( g_trac_errl, INFO_MRK
                                   "Terminating error was committed"
                                   " errlmanager is reqesting a shutdown.");

                        INITSERVICE::doShutdown(l_err->plid(), true);

                        TRACDCOMP( g_trac_errl,
                                INFO_MRK"shutdown in progress" );
                    }

                    // If l_errl has not been fully proccessed delete it
                    // otherwise add to list
                    if (l_pair.second == 0)
                    {
                        delete l_err;
                        l_err = NULL;
                    }
                    else
                    {
                        iv_errlList.push_back(l_pair);
                    }

                    //We are done with the msg
                    msg_free(theMsg);

                    // else go back and wait for a next msg
                    break;
                }
            case ERRLOG_COMMITTED_ACK_RESPONSE_TYPE:
                {
                    //Hostboot must keep track and clean up hostboot error
                    //logs in PNOR after it is committed by FSP.
                    uint32_t l_tmpPlid = theMsg->data[0]>>32;
                    TRACFCOMP( g_trac_errl, INFO_MRK"ack: %.8x", l_tmpPlid);

                    bool didAck = ackErrLogInPnor(l_tmpPlid);
                    if (!didAck)
                    {
                        // couldn't find that errlog in PNOR, look in our
                        // errlMsgList - maybe it's there waiting
                        ErrlListItr_t it = std::find_if(iv_errlList.begin(),
                                        iv_errlList.end(),
                                        std::bind1st(ptr_fun(&compareEidToPlid)
                                                             ,l_tmpPlid));
                        // Check if such errl was found
                        if (it != iv_errlList.end())
                        {
                            // We found the errlog
                            // Mark PNOR processing complete
                            _clearFlag(*it, PNOR_FLAG);
                            _updateErrlListIter(it);
                        }
                    }

                    msg_free(theMsg);

                    // We didn't have room before in PNOR to save an
                    // error log, so try now since we just ACKed one.
                    ErrlListItr_t it = std::find_if(iv_errlList.begin(),
                                        iv_errlList.end(),
                                        bind2nd(ptr_fun(_isFlagSet),
                                        PNOR_FLAG));

                    // Check if such errl was found
                    if (it != iv_errlList.end())
                    {
                        bool l_savedToPnor = saveErrLogToPnor(it->first);

                        // check if we actually saved the msg to PNOR
                        if (l_savedToPnor)
                        {
                            // Mark PNOR processing complete
                            _clearFlag(*it, PNOR_FLAG);
                            _updateErrlListIter(it);
                        }
                        // else, still couldn't save it (for some reason) so
                        // it's still on the list.
                    }
                    break;
                }
            case ERRLOG_SHUTDOWN_TYPE:
                TRACFCOMP( g_trac_errl, INFO_MRK "Shutdown event received" );

                //Start shutdown process for error log
                errlogShutdown();

                // Respond that we are done shutting down.
                msg_respond ( iv_msgQ, theMsg );

                TRACFCOMP( g_trac_errl, INFO_MRK "Shutdown event processed" );

                break;

            default:
                // Default Message
                TRACFCOMP( g_trac_errl, ERR_MRK "Unexpected message type 0x%08x",
                                                                  theMsg->type );

                msg_free(theMsg);
                break;
        } // switch
    }

    //The errlogMsgHndlr should run all the time. It only
    //exits when error log message thread is killed.
    TRACFCOMP( g_trac_errl, EXIT_MRK "Exit ErrlManager::errlogMsgHndlr" );
    return;
}
Exemple #2
0
// ------------------------------------------------------------------
// setupPnorInfo
// ------------------------------------------------------------------
void ErrlManager::setupPnorInfo()
{
    TRACFCOMP( g_trac_errl, ENTER_MRK"setupPnorInfo" );

    do
    {
        // Get SPD PNOR section info from PNOR RP
        PNOR::SectionInfo_t info;
        errlHndl_t err = PNOR::getSectionInfo( PNOR::HB_ERRLOGS, info );

        if (err)
        {
            TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo getSectionInfo failed");
            assert(err == NULL);
            break;
        }

        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo sectionInfo id %d name \"%s\" size %d",
                info.id, info.name, info.size );

        // Set the globals appropriately
        iv_pnorAddr = reinterpret_cast<char *> (info.vaddr);
        iv_maxErrlInPnor = info.size / PNOR_ERROR_LENGTH;

        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo iv_pnorAddr %p maxErrlInPnor %d",
                iv_pnorAddr, iv_maxErrlInPnor );

        // initial value, in case PNOR is empty - start at this end slot
        // so that our first save will increment and wrap correctly
        iv_pnorOpenSlot = (iv_maxErrlInPnor - 1);

        // walk thru memory, finding error logs and determine the highest ID
        uint32_t l_maxId = 0;
        for (uint32_t i = 0; i < iv_maxErrlInPnor; i++)
        {
            if (!isSlotEmpty(i))
            {
                uint32_t l_id = readEidFromFlattened(i);
                // If id is not from HB (0x9XXXXXXX) grab plid instead
                if ( (l_id & FIRST_BYTE_ERRLOG) != ERRLOG_PLID_BASE )
                {
                    l_id = readPlidFromFlattened(i);
                }
                if (l_id > l_maxId )
                {
                    l_maxId = l_id;

                    // set this - start at this 'max' slot so that our first
                    // save will increment correctly
                    iv_pnorOpenSlot = i;
                }
                // also check if it's ACKed or not
                if (!isSlotACKed(i))
                {
                    TRACFCOMP( g_trac_errl,
                        INFO_MRK"setupPnorInfo slot %d eid %.8X was not ACKed.",
                        i, l_id);

#ifdef CONFIG_BMC_IPMI
                    // for IPMI systems, unflatten to send down to the BMC
                    err = new ERRORLOG::ErrlEntry(
                            ERRORLOG::ERRL_SEV_UNRECOVERABLE, 0,0);
                    char *l_errlAddr = iv_pnorAddr + (PNOR_ERROR_LENGTH * i);
                    uint64_t rc = err->unflatten(l_errlAddr, PNOR_ERROR_LENGTH);
                    if (rc != 0)
                    {
                        // unflatten didn't work, nothing we can do
                        TRACFCOMP( g_trac_errl,
                            ERR_MRK"setupPnorInfo unflatten failed on slot %d eid %.8X.",
                            i, l_id);
                    }
                    else
                    {
                        if (iv_isIpmiEnabled)
                        {
                            // convert to SEL/eSEL and send to BMC over IPMI
                            sendErrLogToBmc(err);
                            delete err;
                        }
                        else
                        {
                            TRACFCOMP( g_trac_errl,
                                INFO_MRK"setupPnorInfo pushing slot %d eid %.8X to iv_errList.",
                                i, l_id);
                            // Pair with IPMI flag to add to the errlList
                            // so that it'll get sent down when IPMI is up
                            ErrlFlagPair_t l_pair(err, IPMI_FLAG);
                            iv_errlList.push_back(l_pair);
                        }
                    }
#else
                    // for FSP system, this shouldn't ever happen.
#endif
                    setACKInFlattened(i);
                } // not ACKed
            } // not empty
        } // for

        // bump the current eid to 1 past the max eid found
        while (!__sync_bool_compare_and_swap(&iv_currLogId, iv_currLogId,
                    (iv_currLogId & ERRLOG_PLID_BASE_MASK) +
                    (l_maxId & ERRLOG_PLID_MASK) + 1));
        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo reseting LogId 0x%X", iv_currLogId);

        // if error(s) came in before PNOR was ready,
        // the error log(s) would be on this list. save now.
        ErrlListItr_t it = iv_errlList.begin();
        while(it != iv_errlList.end())
        {
            // Check if PNOR processing is needed
            if (_isFlagSet(*it, PNOR_FLAG))
            {
                //ACK it if no one is there to receive
                bool l_savedToPnor = saveErrLogToPnor(it->first);

                // check if we actually saved the msg to PNOR
                if (l_savedToPnor)
                {
                    // Mark PNOR processing complete
                    _clearFlag(*it, PNOR_FLAG);
                    _updateErrlListIter(it);
                }
                else
                {
                    // still couldn't save it (PNOR maybe full) so
                    // it's still on the list.
                    break; // get out of this while loop.
                }
            }
            else
            {
                ++it;
            }
        }
    } while (0);

    TRACFCOMP( g_trac_errl, EXIT_MRK"setupPnorInfo");
} // setupPnorInfo
Exemple #3
0
// ------------------------------------------------------------------
// setupPnorInfo
// ------------------------------------------------------------------
void ErrlManager::setupPnorInfo()
{
    TRACFCOMP( g_trac_errl, ENTER_MRK"setupPnorInfo" );

    do
    {
        // Get SPD PNOR section info from PNOR RP
        PNOR::SectionInfo_t info;
        errlHndl_t err = PNOR::getSectionInfo( PNOR::HB_ERRLOGS, info );

        if (err)
        {
            TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo getSectionInfo failed");
            assert(err == NULL);
            break;
        }

        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo sectionInfo id %d name \"%s\" size %d",
                info.id, info.name, info.size );

        // Set the globals appropriately
        iv_pnorAddr = reinterpret_cast<char *> (info.vaddr);
        iv_maxErrlInPnor = info.size / PNOR_ERROR_LENGTH;

        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo iv_pnorAddr %p maxErrlInPnor %d",
                iv_pnorAddr, iv_maxErrlInPnor );

        // initial value, in case PNOR is empty - start at this end slot
        // so that our first save will increment and wrap correctly
        iv_pnorOpenSlot = (iv_maxErrlInPnor - 1);

        // walk thru memory, finding error logs and determine the highest ID
        uint32_t l_maxId = 0;
        for (uint32_t i = 0; i < iv_maxErrlInPnor; i++)
        {
            if (!isSlotEmpty(i))
            {
                uint32_t l_id = readEidFromFlattened(i);
                // If id is not from HB (0x9XXXXXXX) grab plid instead
                if ( (l_id & FIRST_BYTE_ERRLOG) != ERRLOG_PLID_BASE )
                {
                    l_id = readPlidFromFlattened(i);
                }
                if (l_id > l_maxId )
                {
                    l_maxId = l_id;

                    // set this - start at this 'max' slot so that our first
                    // save will increment correctly
                    iv_pnorOpenSlot = i;
                }
                // also check if it's ACKed or not. and ACK it.
                // for FSP system, this shouldn't ever happen.
                // for non-FSP systems, this clears out all 'last IPL' logs
                if (!isSlotACKed(i))
                {
                    TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo slot %d eid %.8X was not ACKed.",
                        i, l_id);
                    setACKInFlattened(i);
                } // not ACKed
            } // not empty
        } // for

        // bump the current eid to 1 past the max eid found
        while (!__sync_bool_compare_and_swap(&iv_currLogId, iv_currLogId,
                    (iv_currLogId & ERRLOG_PLID_BASE_MASK) +
                    (l_maxId & ERRLOG_PLID_MASK) + 1));
        TRACFCOMP( g_trac_errl, INFO_MRK"setupPnorInfo reseting LogId 0x%X", iv_currLogId);

        // if error(s) came in before PNOR was ready,
        // the error log(s) would be on this list. save now.
        ErrlListItr_t it = iv_errlList.begin();
        while(it != iv_errlList.end())
        {
            // Check if PNOR processing is needed
            if (_isPnorFlagSet(*it))
            {
                //ACK it if no one is there to receive
                bool l_savedToPnor = saveErrLogToPnor(it->first);

                // check if we actually saved the msg to PNOR
                if (l_savedToPnor)
                {
                    // Mark PNOR processing complete
                    _clearPnorFlag(*it);
                    _updateErrlListIter(it);
                }
                else
                {
                    // still couldn't save it (PNOR maybe full) so
                    // it's still on the list.
                    break; // get out of this while loop.
                }
            }
            else
            {
                ++it;
            }
        }
    } while (0);

    TRACFCOMP( g_trac_errl, EXIT_MRK"setupPnorInfo");
} // setupPnorInfo