Ejemplo n.º 1
0
    // Create/Build an Error log and add HTMGT component trace
    void bldErrLog(errlHndl_t &   io_err,
                   const uint8_t  i_modid,
                   const uint16_t i_rc,
                   const uint32_t i_data1,
                   const uint32_t i_data2,
                   const uint32_t i_data3,
                   const uint32_t i_data4,
                   const ERRORLOG::errlSeverity_t i_sev,
                   const bool i_addFwCallout)
    {
        TMGT_INF("bldErrLog(mod: 0x%02X, rc: 0x%02X, data: 0x%08X %08X %08X"
                 " %08X, sev: 0x%02X, fw:%c",
                 i_modid, i_rc, i_data1, i_data2, i_data3, i_data4,
                 i_sev, i_addFwCallout?'y':'n');
        // TODO RTC 124739 - RAS review what logs need fw callout

        if (NULL == io_err)
        {
            io_err = new ERRORLOG::ErrlEntry(i_sev,
                                             i_modid,
                                             i_rc,
                                             ((uint64_t)i_data1 << 32) |
                                             i_data2,
                                             ((uint64_t)i_data3 << 32) |
                                             i_data4,
                                             i_addFwCallout);
            io_err->collectTrace("HTMGT");
        }
        else
        {
            // TODO RTC 124739
            // - collectTrace will not filter dup traces and no way to clear
            // - no way to add secondary SRC to elog
            io_err->collectTrace("HTMGT");

            uint32_t additionalSrc[] =
            {
                uint32_t(HTMGT_COMP_ID | i_rc), uint32_t(i_modid),
                uint32_t(i_sev), uint32_t(i_addFwCallout?1:0),
                i_data1, i_data2, i_data3, i_data4
            };
            io_err->addFFDC(HTMGT_COMP_ID,
                            additionalSrc,
                            sizeof(additionalSrc),
                            1,  // version
                            SUBSEC_ADDITIONAL_SRC);
        }
    }
Ejemplo n.º 2
0
    // Move the OCCs to active state or log unrecoverable error and
    // stay in safe mode
    void processOccStartStatus(const bool i_startCompleted,
                               TARGETING::Target * i_failedOccTarget)
    {
        TMGT_INF(">>processOccStartStatus(%d,0x%p)",
                 i_startCompleted, i_failedOccTarget);
        errlHndl_t l_err = NULL;
        uint32_t l_huid = 0;
        if (i_failedOccTarget)
        {
            l_huid = TARGETING::get_huid(i_failedOccTarget);
        }
        TMGT_INF("processOccStartStatus(Start Success=%c, failedOcc=0x%08X)",
                 i_startCompleted?'y':'n', l_huid);
        if (i_startCompleted)
        {
            // Query functional OCCs
            l_err = OccManager::buildOccs();
            if (NULL == l_err)
            {
                if (NULL != OccManager::getMasterOcc())
                {
                    do
                    {
#ifndef __HOSTBOOT_RUNTIME
                        // Build pstate tables (once per IPL)
                        l_err = genPstateTables();
                        if(l_err)
                        {
                            break;
                        }

                        // Calc memory throttles (once per IPL)
                        calcMemThrottles();
#endif

                        // Make sure OCCs are ready for communication
                        OccManager::waitForOccCheckpoint();

#ifdef __HOSTBOOT_RUNTIME
                        // TODO RTC 124738  Final solution TBD
                        //  Perhapse POLL scom 0x6a214 until bit 31 is set?
                        nanosleep(1,0);
#endif

                        // Send poll to establish comm
                        TMGT_INF("Send initial poll to all OCCs to"
                                 " establish comm");
                        l_err = OccManager::sendOccPoll();
                        if (l_err)
                        {
                            // Continue even if failed (poll will be retried)
                            ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
                        }

                        // Send ALL config data
                        sendOccConfigData();

                        // Set the User PCAP
                        l_err = sendOccUserPowerCap();
                        if (l_err)
                        {
                            break;
                        }

                        // Wait for all OCCs to go to the target state
                        l_err = waitForOccState();
                        if ( l_err )
                        {
                            break;
                        }

                        // Set active sensors for all OCCs,
                        // so BMC can start communication with OCCs
                        l_err = setOccActiveSensors(true);
                        if (l_err)
                        {
                            // Continue even if failed to update sensor
                            ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
                        }

                    } while(0);
                }
                else
                {
                    TMGT_ERR("Unable to find any Master capable OCCs");
                    /*@
                     * @errortype
                     * @reasoncode      HTMGT_RC_OCC_MASTER_NOT_FOUND
                     * @moduleid        HTMGT_MOD_LOAD_START_STATUS
                     * @userdata1[0:7]  number of OCCs
                     * @devdesc         No OCC master was found
                     */
                    bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
                              HTMGT_RC_OCC_MASTER_NOT_FOUND,
                              OccManager::getNumOccs(), 0, 0, 0,
                              ERRORLOG::ERRL_SEV_INFORMATIONAL);
                }
            }
            else
            {
                // Failed to find functional OCCs, no need to try again
                // Set original error log  as unrecoverable and commit
                l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
                ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
            }
        }
        else
        {
            TMGT_ERR("All OCCs were not loaded/started successfully");
            /*@
             * @errortype
             * @reasoncode      HTMGT_RC_OCC_START_FAIL
             * @moduleid        HTMGT_MOD_LOAD_START_STATUS
             * @userdata1       Failing OCC HUID
             * @devdesc         OCCs were not loaded/started successfully
             */
            bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS,
                      HTMGT_RC_OCC_START_FAIL,
                      l_huid, 0, 0, 0,
                      ERRORLOG::ERRL_SEV_INFORMATIONAL);
        }

        if (NULL != l_err)
        {
            TMGT_ERR("OCCs not all active.  Attempting OCC Reset");
            TMGT_CONSOLE("OCCs are not active (rc=0x%04X). "
                         "Attempting OCC Reset",
                         l_err->reasonCode());
            TMGT_INF("Calling resetOccs");
            errlHndl_t err2 = OccManager::resetOccs(NULL);
            if(err2)
            {
                TMGT_ERR("OccManager::resetOccs failed with 0x%04X",
                         err2->reasonCode());

                // Set original error log  as unrecoverable and commit
                l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE);
                ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);

                // Commit occReset error
                ERRORLOG::errlCommit(err2, HTMGT_COMP_ID);
            }
            else
            {
                // retry worked - commit original error as informational
                l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL);
                ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
            }
        }
        TMGT_INF("<<processOccStartStatus()");

    } // end processOccStartStatus()
Ejemplo n.º 3
0
    // Set the OCC state
    errlHndl_t enableOccActuation(bool i_occActivation)
    {
        TMGT_INF(">>enableOccActuation(%c)", i_occActivation?'Y':'N');
        errlHndl_t l_err = NULL;
        TARGETING::Target* sys = NULL;

        // If the system is already in safemode then can't talk to OCCs
        TARGETING::targetService().getTopLevelTarget(sys);
        uint8_t safeMode = 0;
        if(sys)
        {
            sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
        }

        if (0 == safeMode)
        {
            occStateId targetState = OCC_STATE_ACTIVE;
            if (false == i_occActivation)
            {
                targetState = OCC_STATE_OBSERVATION;
            }

            // Set state for all OCCs
            l_err = OccManager::setOccState(targetState);
            if (NULL == l_err)
            {
                TMGT_INF("enableOccActuation: OCC states updated to 0x%02X",
                         targetState);
            }

            if (OccManager::occNeedsReset())
            {
                if (l_err)
                {
                    // Commit setOccState elog since OCCs will be reset
                    // and recovery attempted.
                    ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID);
                }

                TMGT_ERR("enableOccActuation(): OCCs need to be reset");
                // Don't pass failed target as OCC should have already
                // been marked as failed during the poll.
                l_err = OccManager::resetOccs(NULL);

                // NOTE: If the system exceeded its reset count and ended up
                // in safe mode an error may not be returned here (if a
                // failure happened after the first reset attempt).
                // This is because the resets are recursive:
                //   HTMGT calls back into HBRT to initiate the reset, then
                //   HBRT calls into HTMGT when reset completed
                // To detected this condition we need to check for safe mode
                // after the recovery attempts and return error if in safe.
                if(sys)
                {
                    sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode);
                }
            }
        }

        if ((NULL == l_err) && safeMode)
        {
            // Create an elog so the user knows the cmd failed.
            TMGT_ERR("enableOccActuation(): System is in safe mode");
            /*@
             * @errortype
             * @reasoncode      HTMGT_RC_OCC_CRIT_FAILURE
             * @moduleid        HTMGT_MOD_ENABLE_OCC_ACTUATION
             * @userdata1       OCC activate [1==true][0==false]
             * @devdesc         Operation not allowed, system is in safe mode
             */
            bldErrLog(l_err,
                      HTMGT_MOD_ENABLE_OCC_ACTUATION,
                      HTMGT_RC_OCC_CRIT_FAILURE,
                      0, i_occActivation, 0, safeMode,
                      ERRORLOG::ERRL_SEV_UNRECOVERABLE);
        }

        TMGT_INF("<<enableOccActuation() returning 0x%04X",
                 (l_err==NULL) ? 0 : l_err->reasonCode());
        return l_err;

    } // end enableOccActuation()
Ejemplo n.º 4
0
    // Notify HTMGT that an OCC has failed and needs to be reset
    void processOccReset(TARGETING::Target * i_proc)
    {
        TMGT_INF(">>processOccReset(0x%p)", i_proc);
        errlHndl_t errl = NULL;
        TARGETING::Target * failedOccTarget = NULL;

        TARGETING::Target* sys = NULL;
        TARGETING::targetService().getTopLevelTarget(sys);
        uint8_t safeMode = 0;

        // If the system is in safemode then ignore request to reset OCCs
        if(sys &&
           sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
           safeMode)
        {
            return;
        }

        // Get functional OCC (one per proc)
        TARGETING::TargetHandleList pOccs;
        getChildChiplets(pOccs, i_proc, TARGETING::TYPE_OCC);
        if (pOccs.size() > 0)
        {
            failedOccTarget = pOccs[0];
        }

        if(NULL != failedOccTarget)
        {
            uint32_t huid = failedOccTarget->getAttr<TARGETING::ATTR_HUID>();
            TMGT_INF("processOccReset(HUID=0x%08X) called", huid);
        }
        else
        {
            uint32_t huid = i_proc->getAttr<TARGETING::ATTR_HUID>();
            TMGT_INF("processOccReset: Invalid OCC target (proc huid=0x08X)"
                     "resetting OCCs anyway",
                     huid);

            /*@
             * @errortype
             * @reasoncode      HTMGT_RC_INVALID_PARAMETER
             * @moduleid        HTMGT_MOD_PROCESS_OCC_RESET
             * @userdata1[0:7]  Processor HUID
             * @devdesc         No OCC target found for proc Target,
             */
            bldErrLog(errl,
                      HTMGT_MOD_PROCESS_OCC_RESET,
                      HTMGT_RC_INVALID_PARAMETER,
                      huid, 0, 0, 1,
                      ERRORLOG::ERRL_SEV_INFORMATIONAL);

            // Add HB firmware callout
            errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
                                      HWAS::SRCI_PRIORITY_MED);
            ERRORLOG::errlCommit(errl, HTMGT_COMP_ID); // sets errl to NULL
        }

        errl = OccManager::resetOccs(failedOccTarget);
        if(errl)
        {
            ERRORLOG::errlCommit(errl, HTMGT_COMP_ID); // sets errl to NULL
        }
        TMGT_INF("<<processOccReset()");
    } // end processOccReset()
Ejemplo n.º 5
0
    // Notify HTMGT that an OCC has an error to report
    void processOccError(TARGETING::Target * i_procTarget)
    {
        TMGT_INF(">>processOccError(0x%p)", i_procTarget);

        TARGETING::Target* sys = NULL;
        TARGETING::targetService().getTopLevelTarget(sys);
        uint8_t safeMode = 0;

        // If the system is in safemode then can't talk to OCCs -
        // ignore call to processOccError
        if(sys &&
           sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) &&
           safeMode)
        {
            return;
        }

        bool polledOneOcc = false;
        errlHndl_t err = OccManager::buildOccs();
        if (NULL == err)
        {
            if (i_procTarget != NULL)
            {
                const uint32_t l_huid =
                    i_procTarget->getAttr<TARGETING::ATTR_HUID>();
                TMGT_INF("processOccError(HUID=0x%08X) called", l_huid);

                TARGETING::TargetHandleList pOccs;
                getChildChiplets(pOccs, i_procTarget, TARGETING::TYPE_OCC);
                if (pOccs.size() > 0)
                {
                    // Poll specified OCC flushing any errors
                    errlHndl_t err = OccManager::sendOccPoll(true, pOccs[0]);
                    if (err)
                    {
                        ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
                    }
                    polledOneOcc = true;
                }
            }

            if ((OccManager::getNumOccs() > 1) || (false == polledOneOcc))
            {
                // Send POLL command to all OCCs to flush any other errors
                errlHndl_t err = OccManager::sendOccPoll(true);
                if (err)
                {
                    ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
                }
            }

            if (OccManager::occNeedsReset())
            {
                TMGT_ERR("processOccError(): OCCs need to be reset");
                // Don't pass failed target as OCC should have already
                // been marked as failed during the poll.
                errlHndl_t err = OccManager::resetOccs(NULL);
                if(err)
                {
                    ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
                }
            }
        }
        else
        {
            // OCC build failed...
            TMGT_ERR("processOccError() called, but unable to find OCCs");
            ERRORLOG::errlCommit(err, HTMGT_COMP_ID);
        }
        TMGT_INF("<<processOccError()");

    } // end processOccError()
Ejemplo n.º 6
0
    // Send pass-thru command to HTMGT
    errlHndl_t passThruCommand(uint16_t   i_cmdLength,
                               uint8_t *  i_cmdData,
                               uint16_t & o_rspLength,
                               uint8_t *  o_rspData)
    {
        errlHndl_t err = NULL;
        htmgtReasonCode failingSrc = HTMGT_RC_NO_ERROR;
        o_rspLength = 0;

        if ((i_cmdLength > 0) && (NULL != i_cmdData))
        {
            switch (i_cmdData[0])
            {
                case PASSTHRU_OCC_STATUS:
                    TMGT_INF("passThruCommand: OCC Status");
                    OccManager::getOccData(o_rspLength, o_rspData);
                    break;

                case PASSTHRU_GENERATE_MFG_PSTATE:
                    if (i_cmdLength == 1)
                    {
                        TMGT_INF("passThruCommand: Generate MFG pstate tables",
                                 i_cmdData[1]);
                        err = genPstateTables(false);
                    }
                    else
                    {
                        TMGT_ERR("passThruCommand: invalid generate pstate "
                                 "command length %d", i_cmdLength);
                        /*@
                         * @errortype
                         * @reasoncode   HTMGT_RC_INVALID_LENGTH
                         * @moduleid     HTMGT_MOD_PASS_THRU
                         * @userdata1    command data[0-7]
                         * @userdata2    command data length
                         * @devdesc      Invalid pass thru command data length
                         */
                        failingSrc = HTMGT_RC_INVALID_LENGTH;
                    }
                    break;

                case PASSTHRU_LOAD_PSTATE:
                    if (i_cmdLength == 2)
                    {
                        const uint8_t pstateType = i_cmdData[1];
                        if ((0 == pstateType) || (1 == pstateType))
                        {
                            TMGT_INF("passThruCommand: Load pstate tables "
                                     "(type: %d)", pstateType);
                            // 0 = Normal Pstate Tables
                            err = OccManager::loadPstates(0 == pstateType);
                        }
                        else
                        {
                            TMGT_ERR("passThruCommand: invalid pstate type "
                                     "specified: %d", pstateType);
                            /*@
                             * @errortype
                             * @reasoncode   HTMGT_RC_INVALID_PARAMETER
                             * @moduleid     HTMGT_MOD_PASS_THRU
                             * @userdata1    command data[0-7]
                             * @userdata2    command data length
                             * @devdesc      Invalid load pstate table type
                             */
                            failingSrc = HTMGT_RC_INVALID_PARAMETER;
                        }
                    }
                    else
                    {
                        TMGT_ERR("passThruCommand: invalid load pstate "
                                 "command length %d", i_cmdLength);
                        failingSrc = HTMGT_RC_INVALID_LENGTH;
                    }
                    break;

                default:
                    TMGT_ERR("passThruCommand: Invalid command 0x%08X "
                             "(%d bytes)", UINT32_GET(i_cmdData), i_cmdLength);
                    /*@
                     * @errortype
                     * @reasoncode   HTMGT_RC_INVALID_DATA
                     * @moduleid     HTMGT_MOD_PASS_THRU
                     * @userdata1    command data[0-7]
                     * @userdata2    command data length
                     * @devdesc      Invalid pass thru command
                     */
                    failingSrc = HTMGT_RC_INVALID_DATA;
                    break;
            }

            if ((HTMGT_RC_NO_ERROR != failingSrc) && (NULL == err))
            {
                bldErrLog(err, HTMGT_MOD_PASS_THRU,
                          failingSrc,
                          UINT32_GET(i_cmdData),
                          UINT32_GET(&i_cmdData[4]),
                          0, i_cmdLength,
                          ERRORLOG::ERRL_SEV_INFORMATIONAL);
            }
        }

        return err;

    } // end passThruCommand()
Ejemplo n.º 7
0
    // Handle OCC poll response
    void Occ::pollRspHandler(const uint8_t * i_pollResponse,
                             const uint16_t i_pollResponseSize)
    {
        static uint32_t L_elog_retry_count = 0;
        TMGT_DBG("OCC Poll Response", i_pollResponse, i_pollResponseSize);

        const occPollRspStruct_t *pollRsp =
            (occPollRspStruct_t *) i_pollResponse;
        const occPollRspStruct_t *lastPollRsp =
            (occPollRspStruct_t *) iv_lastPollResponse;

        // Trace if any data changed
        if ((false == iv_lastPollValid) ||
            (memcmp(pollRsp,
                    lastPollRsp,
                    OCC_POLL_DATA_MIN_SIZE) != 0))
        {
            TMGT_INF("OCC%d Poll change: Status:%04X Occs:%02X Cfg:%02X "
                     "State:%02X Error:%06X/%08X",
                     iv_instance,
                     (pollRsp->status << 8) | pollRsp->extStatus,
                     pollRsp->occsPresent,
                     pollRsp->requestedCfg, pollRsp->state,
                     (pollRsp->errorId<<16) | pollRsp->errorLength,
                     pollRsp->errorAddress);
        }

        do
        {
            if (false == iv_commEstablished)
            {
                // 1st poll response, so comm has been established for this OCC
                iv_commEstablished = true;
                TMGT_INF("pollRspHandler: FW Level for OCC%d: %.16s",
                         iv_instance, pollRsp->codeLevel);
            }

            // Check for Error Logs
            if (pollRsp->errorId != 0)
            {
                if ((pollRsp->errorId != lastPollRsp->errorId) ||
                    (L_elog_retry_count < 3))

                {
                    if (pollRsp->errorId == lastPollRsp->errorId)
                    {
                        // Only retry same errorId a few times...
                        L_elog_retry_count++;
                        TMGT_ERR("pollRspHandler: Requesting elog 0x%02X"
                                 " (retry %d)",
                                 pollRsp->errorId, L_elog_retry_count);
                    }
                    else
                    {
                        L_elog_retry_count = 0;
                    }

                    // Handle a new error log from the OCC
                    occProcessElog(this,
                                   pollRsp->errorId,
                                   pollRsp->errorAddress,
                                   pollRsp->errorLength);
                    if (iv_needsReset)
                    {
                        // Update state if changed...
                        // (since dropping out of poll rsp handler)
                        if (iv_state != pollRsp->state)
                        {
                            iv_state = (occStateId)pollRsp->state;
                            TMGT_INF("pollRspHandler: updating OCC%d state"
                                     " to %s",
                                     iv_instance, state_string(iv_state));
                        }
                        break;
                    }
                }
            }

            if ((OCC_STATE_ACTIVE == pollRsp->state) ||
                (OCC_STATE_OBSERVATION == pollRsp->state))
            {
                // Check role status
                if (((OCC_ROLE_SLAVE == iv_role) &&
                     ((pollRsp->status & OCC_STATUS_MASTER) != 0)) ||
                    ((OCC_ROLE_MASTER == iv_role) &&
                     ((pollRsp->status & OCC_STATUS_MASTER) == 0)))
                {
                    TMGT_ERR("pollRspHandler: OCC%d Status role mismatch"
                             " (role:0x%02X, status:0x%02X 0x%02X)",
                             iv_instance, iv_role, pollRsp->status,
                             pollRsp->extStatus);
                    iv_needsReset = true;
                    // TODO RTC 109224
                    //iv_resetReason = OCC_RESET_REASON_ERROR;
                    break;
                }
            }

            //iv_requestedFormat = (occCfgDataFormat)pollRsp->requestedCfg;
            if (pollRsp->requestedCfg != 0x00)
            {
                TMGT_INF("pollRspHandler: OCC%d is requesting cfg format"
                         " 0x%02X", iv_instance,
                         pollRsp->requestedCfg);
            }

            // Check for state change
            if (iv_state != pollRsp->state)
            {
                iv_state = (occStateId)pollRsp->state;
                TMGT_INF("pollRspHandler: updating OCC%d state to %s",
                         iv_instance, state_string(iv_state));
            }

            // Copy rspData to lastPollResponse
            memcpy(iv_lastPollResponse, pollRsp, OCC_POLL_DATA_MIN_SIZE);
            iv_lastPollValid = true;
        }
        while(0);

        // NOTE: When breaking out of the above while loop, the new poll
        //       response is NOT copied to lastPollResponse (should only
        //       break when reset required)

        if (true == iv_needsReset)
        {
            // Save full poll response
            memcpy(iv_lastPollResponse, pollRsp, OCC_POLL_DATA_MIN_SIZE);
            iv_lastPollValid = true;
            iv_state = (occStateId)pollRsp->state;
        }

    } // end Occ::pollRspHandler()