// Move the OCCs to active state or log unrecoverable error and // stay in safe mode void processOccStartStatus(const bool i_startCompleted, TARGETING::Target * i_failedOccTarget) { TMGT_INF(">>processOccStartStatus(%d,0x%p)", i_startCompleted, i_failedOccTarget); errlHndl_t l_err = NULL; uint32_t l_huid = 0; if (i_failedOccTarget) { l_huid = TARGETING::get_huid(i_failedOccTarget); } TMGT_INF("processOccStartStatus(Start Success=%c, failedOcc=0x%08X)", i_startCompleted?'y':'n', l_huid); if (i_startCompleted) { // Query functional OCCs l_err = OccManager::buildOccs(); if (NULL == l_err) { if (NULL != OccManager::getMasterOcc()) { do { #ifndef __HOSTBOOT_RUNTIME // Build pstate tables (once per IPL) l_err = genPstateTables(); if(l_err) { break; } // Calc memory throttles (once per IPL) calcMemThrottles(); #endif // Make sure OCCs are ready for communication OccManager::waitForOccCheckpoint(); #ifdef __HOSTBOOT_RUNTIME // TODO RTC 124738 Final solution TBD // Perhapse POLL scom 0x6a214 until bit 31 is set? nanosleep(1,0); #endif // Send poll to establish comm TMGT_INF("Send initial poll to all OCCs to" " establish comm"); l_err = OccManager::sendOccPoll(); if (l_err) { // Continue even if failed (poll will be retried) ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } // Send ALL config data sendOccConfigData(); // Set the User PCAP l_err = sendOccUserPowerCap(); if (l_err) { break; } // Wait for all OCCs to go to the target state l_err = waitForOccState(); if ( l_err ) { break; } // Set active sensors for all OCCs, // so BMC can start communication with OCCs l_err = setOccActiveSensors(true); if (l_err) { // Continue even if failed to update sensor ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } } while(0); } else { TMGT_ERR("Unable to find any Master capable OCCs"); /*@ * @errortype * @reasoncode HTMGT_RC_OCC_MASTER_NOT_FOUND * @moduleid HTMGT_MOD_LOAD_START_STATUS * @userdata1[0:7] number of OCCs * @devdesc No OCC master was found */ bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS, HTMGT_RC_OCC_MASTER_NOT_FOUND, OccManager::getNumOccs(), 0, 0, 0, ERRORLOG::ERRL_SEV_INFORMATIONAL); } } else { // Failed to find functional OCCs, no need to try again // Set original error log as unrecoverable and commit l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE); ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } } else { TMGT_ERR("All OCCs were not loaded/started successfully"); /*@ * @errortype * @reasoncode HTMGT_RC_OCC_START_FAIL * @moduleid HTMGT_MOD_LOAD_START_STATUS * @userdata1 Failing OCC HUID * @devdesc OCCs were not loaded/started successfully */ bldErrLog(l_err, HTMGT_MOD_LOAD_START_STATUS, HTMGT_RC_OCC_START_FAIL, l_huid, 0, 0, 0, ERRORLOG::ERRL_SEV_INFORMATIONAL); } if (NULL != l_err) { TMGT_ERR("OCCs not all active. Attempting OCC Reset"); TMGT_CONSOLE("OCCs are not active (rc=0x%04X). " "Attempting OCC Reset", l_err->reasonCode()); TMGT_INF("Calling resetOccs"); errlHndl_t err2 = OccManager::resetOccs(NULL); if(err2) { TMGT_ERR("OccManager::resetOccs failed with 0x%04X", err2->reasonCode()); // Set original error log as unrecoverable and commit l_err->setSev(ERRORLOG::ERRL_SEV_UNRECOVERABLE); ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); // Commit occReset error ERRORLOG::errlCommit(err2, HTMGT_COMP_ID); } else { // retry worked - commit original error as informational l_err->setSev(ERRORLOG::ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } } TMGT_INF("<<processOccStartStatus()"); } // end processOccStartStatus()
// Set the OCC state errlHndl_t enableOccActuation(bool i_occActivation) { TMGT_INF(">>enableOccActuation(%c)", i_occActivation?'Y':'N'); errlHndl_t l_err = NULL; TARGETING::Target* sys = NULL; // If the system is already in safemode then can't talk to OCCs TARGETING::targetService().getTopLevelTarget(sys); uint8_t safeMode = 0; if(sys) { sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode); } if (0 == safeMode) { occStateId targetState = OCC_STATE_ACTIVE; if (false == i_occActivation) { targetState = OCC_STATE_OBSERVATION; } // Set state for all OCCs l_err = OccManager::setOccState(targetState); if (NULL == l_err) { TMGT_INF("enableOccActuation: OCC states updated to 0x%02X", targetState); } if (OccManager::occNeedsReset()) { if (l_err) { // Commit setOccState elog since OCCs will be reset // and recovery attempted. ERRORLOG::errlCommit(l_err, HTMGT_COMP_ID); } TMGT_ERR("enableOccActuation(): OCCs need to be reset"); // Don't pass failed target as OCC should have already // been marked as failed during the poll. l_err = OccManager::resetOccs(NULL); // NOTE: If the system exceeded its reset count and ended up // in safe mode an error may not be returned here (if a // failure happened after the first reset attempt). // This is because the resets are recursive: // HTMGT calls back into HBRT to initiate the reset, then // HBRT calls into HTMGT when reset completed // To detected this condition we need to check for safe mode // after the recovery attempts and return error if in safe. if(sys) { sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode); } } } if ((NULL == l_err) && safeMode) { // Create an elog so the user knows the cmd failed. TMGT_ERR("enableOccActuation(): System is in safe mode"); /*@ * @errortype * @reasoncode HTMGT_RC_OCC_CRIT_FAILURE * @moduleid HTMGT_MOD_ENABLE_OCC_ACTUATION * @userdata1 OCC activate [1==true][0==false] * @devdesc Operation not allowed, system is in safe mode */ bldErrLog(l_err, HTMGT_MOD_ENABLE_OCC_ACTUATION, HTMGT_RC_OCC_CRIT_FAILURE, 0, i_occActivation, 0, safeMode, ERRORLOG::ERRL_SEV_UNRECOVERABLE); } TMGT_INF("<<enableOccActuation() returning 0x%04X", (l_err==NULL) ? 0 : l_err->reasonCode()); return l_err; } // end enableOccActuation()
// Notify HTMGT that an OCC has failed and needs to be reset void processOccReset(TARGETING::Target * i_proc) { TMGT_INF(">>processOccReset(0x%p)", i_proc); errlHndl_t errl = NULL; TARGETING::Target * failedOccTarget = NULL; TARGETING::Target* sys = NULL; TARGETING::targetService().getTopLevelTarget(sys); uint8_t safeMode = 0; // If the system is in safemode then ignore request to reset OCCs if(sys && sys->tryGetAttr<TARGETING::ATTR_HTMGT_SAFEMODE>(safeMode) && safeMode) { return; } // Get functional OCC (one per proc) TARGETING::TargetHandleList pOccs; getChildChiplets(pOccs, i_proc, TARGETING::TYPE_OCC); if (pOccs.size() > 0) { failedOccTarget = pOccs[0]; } if(NULL != failedOccTarget) { uint32_t huid = failedOccTarget->getAttr<TARGETING::ATTR_HUID>(); TMGT_INF("processOccReset(HUID=0x%08X) called", huid); } else { uint32_t huid = i_proc->getAttr<TARGETING::ATTR_HUID>(); TMGT_INF("processOccReset: Invalid OCC target (proc huid=0x08X)" "resetting OCCs anyway", huid); /*@ * @errortype * @reasoncode HTMGT_RC_INVALID_PARAMETER * @moduleid HTMGT_MOD_PROCESS_OCC_RESET * @userdata1[0:7] Processor HUID * @devdesc No OCC target found for proc Target, */ bldErrLog(errl, HTMGT_MOD_PROCESS_OCC_RESET, HTMGT_RC_INVALID_PARAMETER, huid, 0, 0, 1, ERRORLOG::ERRL_SEV_INFORMATIONAL); // Add HB firmware callout errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE, HWAS::SRCI_PRIORITY_MED); ERRORLOG::errlCommit(errl, HTMGT_COMP_ID); // sets errl to NULL } errl = OccManager::resetOccs(failedOccTarget); if(errl) { ERRORLOG::errlCommit(errl, HTMGT_COMP_ID); // sets errl to NULL } TMGT_INF("<<processOccReset()"); } // end processOccReset()
// Process elog entry from OCC poll response void occProcessElog(Occ * i_occ, const uint8_t i_id, const uint32_t i_address, const uint16_t i_length) { errlHndl_t l_errlHndl = NULL; // Read data from SRAM (length must be multiple of 8 bytes) const uint16_t l_length = (i_length + 8) & 0xFFF8; uint8_t l_sram_data[8 + l_length]; ecmdDataBufferBase l_buffer(l_length*8); // convert to bits // HBOCC is only defined for HTMGT #ifdef CONFIG_HTMGT l_errlHndl = HBOCC::readSRAM(i_occ->getTarget(), i_address, l_buffer); #endif if (NULL == l_errlHndl) { const uint32_t l_flatSize = l_buffer.flattenSize(); l_buffer.flatten(l_sram_data, l_flatSize); // Skip 8 byte ecmd header const occErrlEntry_t *l_occElog=(occErrlEntry_t *)&l_sram_data[8]; TMGT_BIN("OCC ELOG", l_occElog, 64); const uint32_t l_occSrc = OCCC_COMP_ID | l_occElog->reasonCode; ERRORLOG::errlSeverity_t l_errlSeverity = ERRORLOG::ERRL_SEV_INFORMATIONAL; #if 0 // TODO: RTC 109224 - determine correct severity/actions // Process Severity const uint8_t l_occSeverity = l_occElog->severity; const uint8_t l_occActions = l_occElog->actions; if (l_occSeverity < OCC_SEV_ACTION_XLATE_SIZE) { l_errlSeverity = occSeverityErrorActionXlate[l_occSeverity].occErrlSeverity; } else { TMGT_ERR("occProcessElog: Severity translate failure" " (severity = 0x%02X)", l_occElog->severity); } // Process elog Actions bool l_occReset = false; elogProcessActions(l_occActions, l_occReset, l_errlSeverity); if (l_occReset == true) { iv_needsReset = true; UPDATE_SAFE_MODE_REASON(l_occSrc, iv_huid, true); } #endif // Create OCC error log // NOTE: word 4 (used by extended reason code) to save off OCC // sub component value which is needed to correctly parse // srcs which have similar uniqueness // NOTE: SRC tags are NOT required here as these logs will get // parsed with the OCC src tags const occErrlUsrDtls_t *l_usrDtls_ptr = (occErrlUsrDtls_t *) ((uint8_t*)l_occElog+sizeof(occErrlEntry_t)+ (l_occElog->numCallouts * sizeof(occErrlCallout_t)) ); bldErrLog(l_errlHndl, (htmgtModuleId)(l_usrDtls_ptr->modId & 0x00FF), (htmgtReasonCode)l_occSrc, // occ reason code l_usrDtls_ptr->userData1, l_usrDtls_ptr->userData2, l_usrDtls_ptr->userData3, ((l_usrDtls_ptr->modId & 0xFF00) << 16 ) | l_occElog->userData4, // extended reason code l_errlSeverity); #if 0 // TODO: RTC 109224 // Add callout information bool l_bad_fru_data = false; uint8_t l_callout_num = 0; if (! ((ERRL_SEV_INFORMATIONAL == l_errlSeverity) && (TMGT_ERRL_ACTIONS_MANUFACTURING_ERROR & l_occActions)) ) { // Only add callouts if this is MFG error and system not in // MFG (in MFG severity would not be Info) uint8_t l_index = 0; uint8_t l_count = 1; const uint8_t l_max_callout = l_occElog->numCallouts; // The beginning address of callout data l_index = sizeof(occErrlEntry_t); do { occErrlCallout_t *l_callout_ptr = NULL; l_callout_ptr = (occErrlCallout_t *) ((uint8_t*)l_occElog+l_index); if (l_callout_ptr->type != 0) { srciPriority l_priority; bool l_success = true; l_success = elogXlateSrciPriority(l_callout_ptr->priority, l_priority); if (l_success == true) { l_success = elogAddCallout(l_errlHndl, l_errlSeverity, l_priority, *l_callout_ptr, l_callout_num); if (l_success == false) { l_bad_fru_data = true; } } else { l_bad_fru_data = true; TMGT_ERR("occProcessElog: Priority translate" " failure (priority = 0x%02X)", l_callout_ptr->priority); } l_index += sizeof(occErrlCallout_t); } // if (l_type != 0) else { // make sure all the remaining callout data are zeros, // otherwise mark bad fru data uint8_t *l_ptr = (uint8_t*)l_occElog+l_index; uint8_t l_len = (l_max_callout-l_count+1)* sizeof(occErrlCallout_t); while (l_len != 0) { if (*l_ptr != 0x00) { TMGT_ERR("occProcessElog: The remaining" " callout data should be all zeros"); l_bad_fru_data = true; break; } l_len--; l_ptr++; } break; } l_count++; } while (l_count <= l_max_callout); } else { TMGT_ERR("MFG error found outside MFG; callouts will not be" " added to log (OCC severity=0x%02X, actions=0x%02X)", l_occSeverity, l_occActions); const uint8_t l_callout_length = l_occElog->numCallouts * 12; const char *l_callout_ptr = (char *)((uint8_t*)l_occElog+ sizeof(occErrlEntry_t)); // Add raw callout data from the OCC l_errlHndl->addUsrDtls(l_callout_ptr, l_callout_length, TMGT_COMP_ID, TMGT_VERSION, TMGT_ERROR_DATA_TYPE); } // Any bad fru data found ? errlHndl_t l_errlHndl2 = NULL; if (l_bad_fru_data == true) { /*@ * @errortype * @refcode LIC_REFCODE * @subsys EPUB_FIRMWARE_SP * @reasoncode HTMGT_RC_OCC_ERROR_LOG * @moduleid HTMGT_MOD_BAD_FRU_CALLOUTS * @userdata1 OCC elog id * @userdata2 Number of good callouts * @devdesc Bad FRU data received in OCC error log */ bldErrLog(l_errlHndl2, HTMGT_MOD_BAD_FRU_CALLOUTS, HTMGT_RC_OCC_ERROR_LOG, i_id, l_callout_num, 0, 0, ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(l_errlHndl2, HTMGT_COMP_ID); } // Check callout number and severity if ((l_callout_num == 0) && (l_errlSeverity != ERRL_SEV_INFORMATIONAL)) { TMGT_ERR("occProcessElog: No FRU callouts found for OCC%d" " elog_id:0x%02X, severity:0x%0X", iv_instance, i_id, l_errlSeverity); /*@ * @errortype * @refcode LIC_REFCODE * @subsys EPUB_FIRMWARE_SP * @reasoncode HTMGT_RC_OCC_ERROR_LOG * @moduleid HTMGT_MOD_MISMATCHING_SEVERITY * @userdata1 OCC elog id * @userdata2 OCC severity * @userdata3 * @userdata4 * @devdesc No FRU callouts found for non-info OCC Error Log */ bldErrLog(l_errlHndl2, HTMGT_MOD_MISMATCHING_SEVERITY, HTMGT_RC_OCC_ERROR_LOG, i_id, l_errlSeverity, 0, 0, ERRL_SEV_INFORMATIONAL); ERRORLOG::errlCommit(l_errlHndl2, HTMGT_COMP_ID); } #endif // Add full OCC error log data as a User Details section l_errlHndl->addFFDC(OCCC_COMP_ID, l_occElog, i_length, 1, // version 0); // subsection #if 0 // TODO: RTC 109224 // Add additional data addTmgtElogData(l_errlHndl); addThermalElogData(l_errlHndl); #endif // Commit Error (or terminate if required) ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); // Clear elog const uint8_t l_cmdData[1] = {i_id}; OccCmd l_cmd(i_occ, OCC_CMD_CLEAR_ERROR_LOG, sizeof(l_cmdData), l_cmdData); l_errlHndl = l_cmd.sendOccCmd(); if (l_errlHndl != NULL) { TMGT_ERR("occProcessElog: Failed to clear elog id %d to" " OCC%d (rc=0x%04X)", i_id, i_occ, l_errlHndl->reasonCode()); ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); } } else { TMGT_ERR("occProcessElog: Unable to read elog %d from SRAM" " address (0x%08X) length (0x%04X), rc=0x%04X", i_id, i_address, i_length, l_errlHndl->reasonCode()); ERRORLOG::errlCommit(l_errlHndl, HTMGT_COMP_ID); } } // end Occ::occProcessElog()
// Send pass-thru command to HTMGT errlHndl_t passThruCommand(uint16_t i_cmdLength, uint8_t * i_cmdData, uint16_t & o_rspLength, uint8_t * o_rspData) { errlHndl_t err = NULL; htmgtReasonCode failingSrc = HTMGT_RC_NO_ERROR; o_rspLength = 0; if ((i_cmdLength > 0) && (NULL != i_cmdData)) { switch (i_cmdData[0]) { case PASSTHRU_OCC_STATUS: TMGT_INF("passThruCommand: OCC Status"); OccManager::getOccData(o_rspLength, o_rspData); break; case PASSTHRU_GENERATE_MFG_PSTATE: if (i_cmdLength == 1) { TMGT_INF("passThruCommand: Generate MFG pstate tables", i_cmdData[1]); err = genPstateTables(false); } else { TMGT_ERR("passThruCommand: invalid generate pstate " "command length %d", i_cmdLength); /*@ * @errortype * @reasoncode HTMGT_RC_INVALID_LENGTH * @moduleid HTMGT_MOD_PASS_THRU * @userdata1 command data[0-7] * @userdata2 command data length * @devdesc Invalid pass thru command data length */ failingSrc = HTMGT_RC_INVALID_LENGTH; } break; case PASSTHRU_LOAD_PSTATE: if (i_cmdLength == 2) { const uint8_t pstateType = i_cmdData[1]; if ((0 == pstateType) || (1 == pstateType)) { TMGT_INF("passThruCommand: Load pstate tables " "(type: %d)", pstateType); // 0 = Normal Pstate Tables err = OccManager::loadPstates(0 == pstateType); } else { TMGT_ERR("passThruCommand: invalid pstate type " "specified: %d", pstateType); /*@ * @errortype * @reasoncode HTMGT_RC_INVALID_PARAMETER * @moduleid HTMGT_MOD_PASS_THRU * @userdata1 command data[0-7] * @userdata2 command data length * @devdesc Invalid load pstate table type */ failingSrc = HTMGT_RC_INVALID_PARAMETER; } } else { TMGT_ERR("passThruCommand: invalid load pstate " "command length %d", i_cmdLength); failingSrc = HTMGT_RC_INVALID_LENGTH; } break; default: TMGT_ERR("passThruCommand: Invalid command 0x%08X " "(%d bytes)", UINT32_GET(i_cmdData), i_cmdLength); /*@ * @errortype * @reasoncode HTMGT_RC_INVALID_DATA * @moduleid HTMGT_MOD_PASS_THRU * @userdata1 command data[0-7] * @userdata2 command data length * @devdesc Invalid pass thru command */ failingSrc = HTMGT_RC_INVALID_DATA; break; } if ((HTMGT_RC_NO_ERROR != failingSrc) && (NULL == err)) { bldErrLog(err, HTMGT_MOD_PASS_THRU, failingSrc, UINT32_GET(i_cmdData), UINT32_GET(&i_cmdData[4]), 0, i_cmdLength, ERRORLOG::ERRL_SEV_INFORMATIONAL); } } return err; } // end passThruCommand()
// Send a poll command to all OCCs errlHndl_t sendOccPoll(const bool i_flushAllErrors) { errlHndl_t l_err = NULL; uint8_t * l_poll_rsp = NULL; // Loop through all functional OCCs std::vector<Occ*> occList = occMgr::instance().getOccArray(); for (std::vector<Occ*>::iterator itr = occList.begin(); (itr < occList.end()) && (NULL == l_err); ++itr) { Occ * occ = (*itr); const uint8_t occInstance = occ->getInstance(); bool continuePolling = false; do { // create 1 byte buffer for poll command data const uint8_t l_cmdData[1] = { 0x10 /*version*/ }; OccCmd cmd(occ, OCC_CMD_POLL, sizeof(l_cmdData), l_cmdData); l_err = cmd.sendOccCmd(); if (l_err != NULL) { // Poll failed TMGT_ERR("sendOccPoll: OCC%d poll failed with rc=0x%04X", occInstance, l_err->reasonCode()); } else { // Poll succeeded, check response uint32_t l_poll_rsp_size = cmd.getResponseData(l_poll_rsp); if (l_poll_rsp_size >= OCC_POLL_DATA_MIN_SIZE) { if (i_flushAllErrors) { const occPollRspStruct_t *currentPollRsp = (occPollRspStruct_t *) l_poll_rsp; if (currentPollRsp->errorId != 0) { // An error was returned, keep polling OCC continuePolling = true; } else { continuePolling = false; } } occ->pollRspHandler(l_poll_rsp, l_poll_rsp_size); } else { TMGT_ERR("sendOccPoll: OCC%d poll command response " "failed with invalid data length %d", occInstance, l_poll_rsp_size); /*@ * @errortype * @reasoncode HTMGT_RC_INVALID_LENGTH * @moduleid HTMGT_MOD_OCC_POLL * @userdata1 OCC instance * @devdesc Invalid POLL response length */ bldErrLog(l_err, HTMGT_MOD_OCC_POLL, HTMGT_RC_INVALID_LENGTH, occInstance, 0, 0, 0, ERRORLOG::ERRL_SEV_INFORMATIONAL); } } } while (continuePolling); } // for each OCC return l_err; } // end sendOccPoll()