int32_t handleLaneRepairEvent( ExtensibleChip * i_chip, TYPE i_busType, uint32_t i_busPos, STEP_CODE_DATA_STRUCT & i_sc, bool i_spareDeployed ) { #define PRDF_FUNC "[LaneRepair::handleLaneRepairEvent] " int32_t l_rc = SUCCESS; TargetHandle_t rxBusTgt = NULL; TargetHandle_t txBusTgt = NULL; bool thrExceeded = true; std::vector<uint8_t> rx_lanes; std::vector<uint8_t> rx_vpdLanes; std::vector<uint8_t> tx_vpdLanes; BitStringBuffer l_vpdLaneMap0to63(64); BitStringBuffer l_vpdLaneMap64to127(64); BitStringBuffer l_newLaneMap0to63(64); BitStringBuffer l_newLaneMap64to127(64); do { #ifdef __HOSTBOOT_MODULE if ( CHECK_STOP == i_sc.service_data->GetAttentionType() ) { // This would only happen on OpenPOWER machines when we are doing // the post IPL analysis. In this case, we do not have the FFDC to // query the IO registers so simply set service call and skip // everything else. i_sc.service_data->SetServiceCall(); return SUCCESS; } #endif // Get the RX and TX targets. l_rc = CalloutUtil::getBusEndpoints( i_chip, rxBusTgt, txBusTgt, i_busType, i_busPos ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "getBusEndpoints() failed" ); break; } // Call io_read_erepair l_rc = readErepair(rxBusTgt, rx_lanes); if (SUCCESS != l_rc) { PRDF_ERR( PRDF_FUNC "readErepair() failed: rxBusTgt=0x%08x", getHuid(rxBusTgt) ); break; } // Add newly failed lanes to capture data for (std::vector<uint8_t>::iterator lane = rx_lanes.begin(); lane != rx_lanes.end(); ++lane) { PRDF_INF( PRDF_FUNC "New failed lane on RX HUID 0x%08x: %d", getHuid(rxBusTgt), *lane); if (*lane < 64) l_newLaneMap0to63.Set(*lane); else if (*lane < 127) l_newLaneMap64to127.Set(*lane - 64); else { PRDF_ERR( PRDF_FUNC "Invalid lane number %d: rxBusTgt=0x%08x", *lane, getHuid(rxBusTgt) ); l_rc = FAIL; break; } } if ( SUCCESS != l_rc ) break; // Add failed lane capture data to errorlog i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(), ( Util::hashString("ALL_FAILED_LANES_0TO63") ^ i_chip->getSignatureOffset() ), l_newLaneMap0to63); i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(), ( Util::hashString("ALL_FAILED_LANES_64TO127") ^ i_chip->getSignatureOffset() ), l_newLaneMap64to127); if (!mfgMode()) // Don't read/write VPD in mfg mode { // Read Failed Lanes from VPD l_rc = getVpdFailedLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes); if (SUCCESS != l_rc) { PRDF_ERR( PRDF_FUNC "getVpdFailedLanes() failed: " "rxBusTgt=0x%08x", getHuid(rxBusTgt) ); break; } // Add VPD lanes to capture data for (std::vector<uint8_t>::iterator lane = rx_vpdLanes.begin(); lane != rx_vpdLanes.end(); ++lane) { if (*lane < 64) l_vpdLaneMap0to63.Set(*lane); else if (*lane < 127) l_vpdLaneMap64to127.Set(*lane - 64); else { PRDF_ERR( PRDF_FUNC "Invalid VPD lane number %d: " "rxBusTgt=0x%08x", *lane, getHuid(rxBusTgt) ); l_rc = FAIL; break; } } if ( SUCCESS != l_rc ) break; // Add failed lane capture data to errorlog i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(), ( Util::hashString("VPD_FAILED_LANES_0TO63") ^ i_chip->getSignatureOffset() ), l_vpdLaneMap0to63); i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(), ( Util::hashString("VPD_FAILED_LANES_64TO127") ^ i_chip->getSignatureOffset() ), l_vpdLaneMap64to127); if (i_spareDeployed) { // Call Erepair to update VPD l_rc = setVpdFailedLanes(rxBusTgt, txBusTgt, rx_lanes, thrExceeded); if (SUCCESS != l_rc) { PRDF_ERR( PRDF_FUNC "setVpdFailedLanes() failed: " "rxBusTgt=0x%08x txBusTgt=0x%08x", getHuid(rxBusTgt), getHuid(txBusTgt) ); break; } if( thrExceeded ) { i_sc.service_data->SetErrorSig( PRDFSIG_ERepair_FWThrExceeded ); } } } if (i_spareDeployed && !thrExceeded) { // Update lists of lanes from VPD rx_vpdLanes.clear(); tx_vpdLanes.clear(); l_rc = getVpdFailedLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes); if (SUCCESS != l_rc) { PRDF_ERR( PRDF_FUNC "getVpdFailedLanes() before power down " "failed: rxBusTgt=0x%08x", getHuid(rxBusTgt) ); break; } // Power down all lanes that have been saved in VPD l_rc = powerDownLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes); if (SUCCESS != l_rc) { PRDF_ERR( PRDF_FUNC "powerDownLanes() failed: rxBusTgt=0x%08x", getHuid(rxBusTgt) ); break; } } else { // Make predictive i_sc.service_data->SetServiceCall(); } } while (0); // Clear FIRs if (rxBusTgt) { l_rc |= erepairFirIsolation(rxBusTgt); l_rc |= clearIOFirs(rxBusTgt); } if ( i_spareDeployed ) { l_rc |= cleanupSecondaryFirBits( i_chip, i_busType, i_busPos ); } // This return code gets returned by the plugin code back to the rule code. // So, we do not want to give a return code that the rule code does not // understand. So far, there is no need return a special code, so always // return SUCCESS. if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "i_chip: 0x%08x i_busType:%d i_busPos:%d", i_chip->GetId(), i_busType, i_busPos ); i_sc.service_data->SetErrorSig( PRDFSIG_ERepair_ERROR ); CalloutUtil::defaultError( i_sc ); } return SUCCESS; #undef PRDF_FUNC }
errlHndl_t main( ATTENTION_VALUE_TYPE i_attentionType, const AttnList & i_attnList ) { PRDF_ENTER( "PRDF::main() Global attnType=%04X", i_attentionType ); // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; g_prd_errlHndl = NULL; uint32_t rc = SUCCESS; // clears all the chips saved to stack during last analysis ServiceDataCollector::clearChipStack(); if(( g_initialized == false)&&(NULL ==systemPtr)) { g_prd_errlHndl = noLock_initialize(); if(g_prd_errlHndl != NULL) rc = PRD_NOT_INITIALIZED; } ServiceDataCollector serviceData; STEP_CODE_DATA_STRUCT sdc; sdc.service_data = &serviceData; SYSTEM_DEBUG_CLASS sysdebug; sysdebug.Reinitialize(i_attnList); //Refresh sysdebug with latest Attn data //////////////////////////////////////////////////////////////////////////// // Normalize global attn type (ie 11,12,13,....) to (CHECKSTOP, RECOVERED, // SPECIAL..) //////////////////////////////////////////////////////////////////////////// if ( i_attentionType == INVALID_ATTENTION_TYPE || i_attentionType >= END_ATTENTION_TYPE ) { rc = PRD_INVALID_ATTENTION_TYPE; PRDF_ERR( "PrdMain: Invalid attention type! Global:%x", i_attentionType ); i_attentionType = RECOVERABLE; // This will prevent RAS service problems } // link to the right service Generator ServiceGeneratorClass & serviceGenerator = ServiceGeneratorClass::ThisServiceGenerator(); // Initialize the SDC error log. Required for GenerateSrcPfa() call below. serviceGenerator.createInitialErrl( i_attentionType ); // check for something wrong if ( g_initialized == false || rc != SUCCESS || systemPtr == NULL ) { if(rc == SUCCESS) { rc = PRD_NOT_INITIALIZED; } PRDF_ERR("PrdMain: PRD failed. RC=%x",rc ); // we are not going to do an analysis - so fill out the Service Data (serviceData.GetErrorSignature())->setSigId(rc); serviceData.SetCallout(SP_CODE); serviceData.SetThresholdMaskId(0); // Sets AT_THRESHOLD, DEGRADED, // SERVICE_CALL } else // do the analysis { // flush Cache so that SCR reads access hardware RegDataCache::getCachedRegisters().flush(); serviceData.SetAttentionType(i_attentionType); // capture time of day serviceGenerator.SetErrorTod( i_attentionType, serviceData ); if(serviceGenerator.QueryLoggingBufferFull()) { serviceData.SetFlooding(); } int32_t analyzeRc = systemPtr->Analyze(sdc, i_attentionType); // flush Cache to free up the memory RegDataCache::getCachedRegisters().flush(); ScanFacility & l_scanFac = ScanFacility::Access(); //delete all the wrapper register objects since these were created //just for plugin code l_scanFac.ResetPluginRegister(); SystemSpecific::postAnalysisWorkarounds(sdc); if(analyzeRc != SUCCESS && g_prd_errlHndl == NULL) { // We have a bad RC, but no error log - Fill out SDC and have // service generator make one (serviceData.GetErrorSignature())->setErrCode( (uint16_t)analyzeRc ); serviceData.SetCallout(SP_CODE); serviceData.SetServiceCall(); // We don't want to gard unless we have a good // return code serviceData.Gard(GardAction::NoGard); } } if(g_prd_errlHndl != NULL) { PRDF_INF("PRDTRACE: PrdMain: g_prd_errlHndl != NULL"); PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_MED, EPUB_PRC_SP_CODE ); // This forces any previous errls to be committed g_prd_errlHndl = NULL; // pw 597903 -- Don't GARD if we got a global error. serviceData.Gard(GardAction::NoGard); } g_prd_errlHndl = serviceGenerator.GenerateSrcPfa( i_attentionType, serviceData ); // Sleep for 20msec to let attention lines settle if we are at threshold. if ( (g_prd_errlHndl == NULL) && serviceData.IsAtThreshold() ) { PlatServices::milliSleep( 0, 20 ); } RasServices::SetTerminateOnCheckstop(true); PRDF_EXIT( "PRDF::main()" ); return(g_prd_errlHndl.release()); }
errlHndl_t main( ATTENTION_VALUE_TYPE i_attentionType, const AttnList & i_attnList ) { PRDF_ENTER( "PRDF::main() Global attnType=%04X", i_attentionType ); // These have to be outside of system scope lock errlHndl_t retErrl = NULL; bool initiateHwudump = false; TARGETING::TargetHandle_t dumpTrgt = NULL; errlHndl_t dumpErrl = NULL; uint32_t dumpErrlActions = 0; { // system scope lock starts ------------------------------------------ // will unlock when going out of scope PRDF_SYSTEM_SCOPELOCK; g_prd_errlHndl = NULL; uint32_t rc = SUCCESS; // clears all the chips saved to stack during last analysis ServiceDataCollector::clearChipStack(); if(( g_initialized == false)&&(NULL ==systemPtr)) { g_prd_errlHndl = noLock_initialize(); if(g_prd_errlHndl != NULL) rc = PRD_NOT_INITIALIZED; } ServiceDataCollector serviceData; STEP_CODE_DATA_STRUCT sdc; sdc.service_data = &serviceData; SYSTEM_DEBUG_CLASS sysdebug; sysdebug.Reinitialize(i_attnList); //Refresh sysdebug with latest Attn data //////////////////////////////////////////////////////////////////////////// // Normalize global attn type (ie 11,12,13,....) to (CHECKSTOP, RECOVERED, // SPECIAL..) //////////////////////////////////////////////////////////////////////////// if ( i_attentionType == INVALID_ATTENTION_TYPE || i_attentionType >= END_ATTENTION_TYPE ) { rc = PRD_INVALID_ATTENTION_TYPE; PRDF_ERR( "PrdMain: Invalid attention type! Global:%x", i_attentionType ); i_attentionType = RECOVERABLE; // This will prevent RAS service problems } // link to the right service Generator ServiceGeneratorClass & serviceGenerator = ServiceGeneratorClass::ThisServiceGenerator(); // Initialize the SDC error log. Required for GenerateSrcPfa() call below. serviceGenerator.createInitialErrl( i_attentionType ); // check for something wrong if ( g_initialized == false || rc != SUCCESS || systemPtr == NULL ) { if(rc == SUCCESS) { rc = PRD_NOT_INITIALIZED; } PRDF_ERR("PrdMain: PRD failed. RC=%x",rc ); // we are not going to do an analysis - so fill out the Service Data (serviceData.GetErrorSignature())->setSigId(rc); serviceData.SetCallout(SP_CODE); serviceData.SetCallout( NextLevelSupport_ENUM, MRU_LOW ); serviceData.SetThresholdMaskId(0); // Sets AT_THRESHOLD, DEGRADED, // SERVICE_CALL } else // do the analysis { // flush Cache so that SCR reads access hardware RegDataCache::getCachedRegisters().flush(); serviceData.setPrimaryAttnType(i_attentionType); // Set the time in which PRD handled the error. Timer timeOfError; PlatServices::getCurrentTime( timeOfError ); serviceData.SetTOE( timeOfError ); ServiceDataCollector l_tempSdc = serviceData; l_tempSdc.setPrimaryPass(); sdc.service_data = &l_tempSdc; int32_t analyzeRc = systemPtr->Analyze( sdc, i_attentionType ); if( PRD_SCAN_COMM_REGISTER_ZERO == analyzeRc ) { // So, the first pass has failed. Hence, there are no primary // bits set. We must start second pass to see if there are any //secondary bits set. sdc.service_data = &serviceData; #if !defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) ForceSyncAnalysis( l_tempSdc ); // save SDC till end of primary pass #endif // starting the second pass PRDF_INF( "PRDF::main() No bits found set in first pass," " starting second pass" ); sysdebug.initAttnPendingtatus( ); //for the second pass if( l_tempSdc.isSecondaryErrFound() ) { sdc.service_data->setSecondaryErrFlag(); } analyzeRc = systemPtr->Analyze( sdc, i_attentionType ); // merging capture data of primary pass with capture data of // secondary pass for better FFDC. serviceData.GetCaptureData().mergeData( l_tempSdc.GetCaptureData()); #if !defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME) // save SDC till end of secondary pass ForceSyncAnalysis( serviceData ); #endif } else { serviceData = l_tempSdc; sdc.service_data = &serviceData; } // flush Cache to free up the memory RegDataCache::getCachedRegisters().flush(); ScanFacility & l_scanFac = ScanFacility::Access(); //delete all the wrapper register objects since these were created //just for plugin code l_scanFac.ResetPluginRegister(); if(analyzeRc != SUCCESS && g_prd_errlHndl == NULL) { (serviceData.GetErrorSignature())->setErrCode( (uint16_t)analyzeRc ); serviceData.SetCallout(SP_CODE); serviceData.SetCallout( NextLevelSupport_ENUM, MRU_LOW ); serviceData.SetServiceCall(); // We don't want to gard unless we have a good // return code serviceData.Gard(GardAction::NoGard); } } if(g_prd_errlHndl != NULL) { PRDF_INF("PRDTRACE: PrdMain: g_prd_errlHndl != NULL"); PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_MED, EPUB_PRC_SP_CODE ); // This is a precautionary step. There is a possibilty that if // severity for g_prd_errlHndl is Predictve and there is only // EPUB_PRC_SP_CODE callout than it will be changed to tracing event. // So adding EPUB_PRC_LVL_SUPP to avoid this. PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_LOW, EPUB_PRC_LVL_SUPP ); // This forces any previous errls to be committed g_prd_errlHndl = NULL; // pw 597903 -- Don't GARD if we got a global error. serviceData.Gard(GardAction::NoGard); } g_prd_errlHndl = serviceGenerator.GenerateSrcPfa( i_attentionType, serviceData, initiateHwudump, dumpTrgt, dumpErrl, dumpErrlActions); // Sleep for 20msec to let attention lines settle if we are at threshold. if ( (g_prd_errlHndl == NULL) && serviceData.IsAtThreshold() ) { PlatServices::milliSleep( 0, 20 ); } retErrl = g_prd_errlHndl.release(); } // system scope lock ends ------------------------------------------ if ( true == initiateHwudump ) { PlatServices::initiateUnitDump( dumpTrgt, dumpErrl, dumpErrlActions ); } PRDF_EXIT( "PRDF::main()" ); return retErrl; }