/** * @brief Handles MCS Channel fail bits, if they exist. * * @param i_membChip The Centaur chip. * @param i_sc ServiceDataColector. * * @return SUCCESS if MCS channel fail is present and properly * handled, FAIL otherwise. */ int32_t handleMcsChnlCs( ExtensibleChip * i_membChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[handleMcsChnlCs] " // We will return FAIL from this function if MCS channel fail bits // are not set. If MCS channel fail bits are set, we will try to analyze // Mcs. If MCS is not analyzed properly, we will return FAIL. // This will trigger rule code to execute alternate resolution. int32_t l_rc = SUCCESS; do { CenMembufDataBundle * mbdb = getMembufDataBundle( i_membChip ); ExtensibleChip * mcsChip = mbdb->getMcsChip(); if( NULL == mcsChip ) { l_rc = FAIL; break; } SCAN_COMM_REGISTER_CLASS * mciFir = mcsChip->getRegister("MCIFIR"); SCAN_COMM_REGISTER_CLASS * mciFirMask = mcsChip->getRegister("MCIFIR_MASK"); l_rc = mciFir->Read(); l_rc |= mciFirMask->Read(); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC"MCIFIR/MCIFIR_MASK read failed for 0x%08x", mcsChip->GetId()); break; } // If any of MCS channel fail bit is set, we will analyze // MCS. It is safe to do hard coded check as channel fail // bits are hard wired and and they can not change without HW // change. // bits 0,1, 6, 8, 9, 22, 23, 40 are channel fail bits. uint64_t chnlCsBitsMask = 0xC2C0030000800000ull; uint64_t mciFirBits = mciFir->GetBitFieldJustified(0, 64); uint64_t mciFirMaskBits = mciFirMask->GetBitFieldJustified(0, 64); if ( mciFirBits & ~mciFirMaskBits & chnlCsBitsMask ) { l_rc = mcsChip->Analyze( i_sc, i_sc.service_data->GetCauseAttentionType() ); if( SUCCESS == l_rc ) break; } l_rc = FAIL; }while( 0 ); return l_rc; #undef PRDF_FUNC } PRDF_PLUGIN_DEFINE( Membuf, handleMcsChnlCs );
int32_t chnlCsCleanup( ExtensibleChip *i_mbChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[MemUtils::chnlCsCleanup] " int32_t o_rc = SUCCESS; do { if( ( NULL == i_mbChip ) || ( TYPE_MEMBUF != getTargetType( i_mbChip->GetChipHandle() ))) { PRDF_ERR( PRDF_FUNC "Invalid parameters" ); o_rc = FAIL; break; } if (( ! i_sc.service_data->IsUnitCS() ) || (CHECK_STOP == i_sc.service_data->getPrimaryAttnType()) ) break; CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip ); if ( !mbdb->iv_doChnlFailCleanup ) break; // Cleanup has already been done. // Set it as SUE generation point. i_sc.service_data->SetFlag( ServiceDataCollector::UERE ); ExtensibleChip * mcsChip = mbdb->getMcsChip(); if ( NULL == mcsChip ) { PRDF_ERR( PRDF_FUNC "MCS chip is NULL for Membuf:0x%08X", i_mbChip->GetId() ); o_rc = FAIL; break; } TargetHandle_t mcs = mcsChip->GetChipHandle(); ExtensibleChip * procChip = NULL; uint8_t pos = getTargetPosition( mcs ); TargetHandle_t proc = getParentChip ( mcs ); if ( NULL == proc ) { PRDF_ERR( PRDF_FUNC "Proc is NULL for Mcs:0x%08X", getHuid( mcs ) ); o_rc = FAIL; break; } procChip = (ExtensibleChip *)systemPtr->GetChip( proc ); if( NULL == procChip ) { PRDF_ERR( PRDF_FUNC "Can not find Proc chip for HUID:0x%08X", getHuid( proc) ); o_rc = FAIL; break; } // This is a cleanup function. If we get any error from scom // operations, we will still continue with cleanup. SCAN_COMM_REGISTER_CLASS * l_tpMask = procChip->getRegister("TP_CHIPLET_FIR_MASK"); o_rc |= l_tpMask->Read(); if ( SUCCESS == o_rc ) { // Bits 5-12 maps to attentions from MCS0-MCS7. l_tpMask->SetBit( 5 + pos ); o_rc |= l_tpMask->Write(); } // Mask attentions from the Centaur const char *iomcFirMask = ( pos < 4 )? "IOMCFIR_0_MASK_OR":"IOMCFIR_1_MASK_OR"; SCAN_COMM_REGISTER_CLASS * iomcMask = procChip->getRegister( iomcFirMask); if ( pos >= 4 ) pos -= 4; // 8 bits are reserved for each Centaur in IOMCFIR. // There are total 4 ( for P system ) centaur supported // in MCS. Bits for first centaur starts from bit 8. iomcMask->SetBitFieldJustified( 8+ ( pos*8 ), 8, 0xff); o_rc |= iomcMask->Write(); SCAN_COMM_REGISTER_CLASS * l_tpfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_nestfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_memfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_memspamask = NULL; l_tpfirmask = i_mbChip->getRegister("TP_CHIPLET_FIR_MASK"); l_nestfirmask = i_mbChip->getRegister("NEST_CHIPLET_FIR_MASK"); l_memfirmask = i_mbChip->getRegister("MEM_CHIPLET_FIR_MASK"); l_memspamask = i_mbChip->getRegister("MEM_CHIPLET_SPA_MASK"); l_tpfirmask->setAllBits(); o_rc |= l_tpfirmask->Write(); l_nestfirmask->setAllBits(); o_rc |= l_nestfirmask->Write(); l_memfirmask->setAllBits(); o_rc |= l_memfirmask->Write(); l_memspamask->setAllBits(); o_rc |= l_memspamask->Write(); for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) { ExtensibleChip * mbaChip = mbdb->getMbaChip( i ); if( NULL != mbaChip ) { TargetHandle_t mba = mbaChip->GetChipHandle(); if ( NULL != mba ) { #if defined(__HOSTBOOT_MODULE) && \ !defined(__HOSTBOOT_RUNTIME) // This is very small platform specific code. So not // creating a separate file for this. int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "mdiaSendEventMsg(0x%08x, SKIP_MBA) " "failed", getHuid( mba ) ); o_rc |= l_rc; } #else int32_t l_rc = DEALLOC::mbaGard( mbaChip ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "mbaGard failed. HUID: 0x%08x", getHuid( mba ) ); o_rc |= l_rc; } #endif // __HOSTBOOT_MODULE } } } // Clean up complete an is no longer required. mbdb->iv_doChnlFailCleanup = false; } while(0); return o_rc; #undef PRDF_FUNC }
/** * @brief Analysis code that is called before the main analyze() function. * @param i_mbChip A MEMBUF chip. * @param i_sc Step Code Data structure * @param o_analyzed TRUE if analysis has been done on this chip * @return failure or success */ int32_t PreAnalysis( ExtensibleChip * i_mbChip, STEP_CODE_DATA_STRUCT & i_sc, bool & o_analyzed ) { #define PRDF_FUNC "[Membuf::PreAnalysis] " int32_t o_rc = SUCCESS; o_analyzed = false; // Get memory capture data. CaptureData & cd = i_sc.service_data->GetCaptureData(); CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip ); ExtensibleChip * mcsChip = mbdb->getMcsChip(); if ( NULL != mcsChip ) { mcsChip->CaptureErrorData( cd, Util::hashString("FirRegs") ); mcsChip->CaptureErrorData( cd, Util::hashString("CerrRegs") ); CenMbaCaptureData::addMemChipletFirRegs( i_mbChip, cd ); } // Check for a Centaur Checkstop do { // Skip if we're already analyzing a unit checkstop if ( i_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) ) break; // Skip if we're analyzing a special attention. // This is a required for a rare scenario when Centaur CS bit comes // up after attention has called PRD and PRD was still at start of // analysis. if ( SPECIAL == i_sc.service_data->GetAttentionType() ) break; // MCIFIR[31] is not always reliable if the unit CS originated on the // Centaur. This is due to packets not getting forwarded to the MCS. // Instead, check for non-zero GLOBAL_CS_FIR. SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("GLOBAL_CS_FIR"); o_rc = fir->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC"Failed to read GLOBAL_CS_FIR on 0x%08x", i_mbChip->GetId() ); break; } if ( fir->BitStringIsZero() ) break; // No unit checkstop // Set Unit checkstop flag i_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS); i_sc.service_data->SetThresholdMaskId(0); // Set the cause attention type i_sc.service_data->SetCauseAttentionType(UNIT_CS); } while (0); return o_rc; #undef PRDF_FUNC }