/** * @brief Handles MCS Channel fail bits, if they exist. * * @param i_membChip The Centaur chip. * @param i_sc ServiceDataColector. * * @return SUCCESS if MCS channel fail is present and properly * handled, FAIL otherwise. */ int32_t handleMcsChnlCs( ExtensibleChip * i_membChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[handleMcsChnlCs] " // We will return FAIL from this function if MCS channel fail bits // are not set. If MCS channel fail bits are set, we will try to analyze // Mcs. If MCS is not analyzed properly, we will return FAIL. // This will trigger rule code to execute alternate resolution. int32_t l_rc = SUCCESS; do { CenMembufDataBundle * mbdb = getMembufDataBundle( i_membChip ); ExtensibleChip * mcsChip = mbdb->getMcsChip(); if( NULL == mcsChip ) { l_rc = FAIL; break; } SCAN_COMM_REGISTER_CLASS * mciFir = mcsChip->getRegister("MCIFIR"); SCAN_COMM_REGISTER_CLASS * mciFirMask = mcsChip->getRegister("MCIFIR_MASK"); l_rc = mciFir->Read(); l_rc |= mciFirMask->Read(); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC"MCIFIR/MCIFIR_MASK read failed for 0x%08x", mcsChip->GetId()); break; } // If any of MCS channel fail bit is set, we will analyze // MCS. It is safe to do hard coded check as channel fail // bits are hard wired and and they can not change without HW // change. // bits 0,1, 6, 8, 9, 22, 23, 40 are channel fail bits. uint64_t chnlCsBitsMask = 0xC2C0030000800000ull; uint64_t mciFirBits = mciFir->GetBitFieldJustified(0, 64); uint64_t mciFirMaskBits = mciFirMask->GetBitFieldJustified(0, 64); if ( mciFirBits & ~mciFirMaskBits & chnlCsBitsMask ) { l_rc = mcsChip->Analyze( i_sc, i_sc.service_data->GetCauseAttentionType() ); if( SUCCESS == l_rc ) break; } l_rc = FAIL; }while( 0 ); return l_rc; #undef PRDF_FUNC } PRDF_PLUGIN_DEFINE( Membuf, handleMcsChnlCs );
/** * @fn MaskMbaCalSecondaryBits * @brief Mask MBACAL secondary Fir bits which may come up because of L4 UE. * @param i_chip The Centaur chip. * @param i_sc ServiceDataColector. * @return SUCCESS. */ int32_t MaskMbaCalSecondaryBits( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[MaskMbaCalSecondaryBits ] " int32_t l_rc = SUCCESS; do { CenMembufDataBundle * membdb = getMembufDataBundle( i_chip ); for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) { ExtensibleChip * mbaChip = membdb->getMbaChip(i); if ( NULL == mbaChip ) continue; SCAN_COMM_REGISTER_CLASS * mbaCalFirMaskOr = mbaChip->getRegister("MBACALFIR_MASK_OR"); mbaCalFirMaskOr->SetBit(9); mbaCalFirMaskOr->SetBit(15); l_rc = mbaCalFirMaskOr->Write(); if ( SUCCESS != l_rc ) { // Do not break. Just print error trace and look for // other MBA. PRDF_ERR( PRDF_FUNC"MBACALFIR_MASK_OR write failed" "for 0x%08x", mbaChip->GetId()); } } }while( 0 ); return SUCCESS; #undef PRDF_FUNC } PRDF_PLUGIN_DEFINE( Membuf, MaskMbaCalSecondaryBits );
bool isSpareBitOnDMIBus( ExtensibleChip * i_mcsChip, ExtensibleChip * i_mbChip ) { bool bitOn = false; do { // If any of these object is NULL, spare bit should not be on. if ( ( NULL == i_mcsChip ) || ( NULL == i_mbChip )) break; // check spare deployed bit on Centaur side SCAN_COMM_REGISTER_CLASS * dmiFir = i_mbChip->getRegister( "DMIFIR" ); int32_t rc = dmiFir->Read(); if ( SUCCESS != rc ) { PRDF_ERR("isSpareBitOnDMIBus() : Failed to read DMIFIR." "MEMBUF: 0x%08X", getHuid( i_mbChip->GetChipHandle()) ); break; } if ( dmiFir->IsBitSet( 9 )) { bitOn = true; break; } // check spare deployed bit on Proc side TargetHandle_t mcsTgt = i_mcsChip->GetChipHandle(); TargetHandle_t procTgt = getConnectedParent( mcsTgt, TYPE_PROC ); ExtensibleChip * procChip = ( ExtensibleChip * )systemPtr->GetChip( procTgt ); uint32_t mcsPos = getTargetPosition( mcsTgt ); const char * regStr = ( 4 > mcsPos) ? "IOMCFIR_0" : "IOMCFIR_1"; SCAN_COMM_REGISTER_CLASS * iomcFir = procChip->getRegister( regStr ); rc = iomcFir->Read(); if ( SUCCESS != rc ) { PRDF_ERR("isSpareBitOnDMIBus() : Failed to read %s." "MCS: 0x%08X", regStr, getHuid(mcsTgt) ); break; } // Bit 9, 17, 25 and 33 are for spare deployed. // Check bit corrosponding to MCS position uint8_t bitPos = 9 + ( mcsPos % 4 ) *8; if ( iomcFir->IsBitSet(bitPos)) { bitOn = true; } }while(0); return bitOn; }
/** * @brief Plugin to mask the side effects of an RCD parity error * @param i_mbaChip A Centaur MBA chip. * @param i_sc The step code data struct. * @return SUCCESS */ int32_t maskRcdParitySideEffects( ExtensibleChip * i_mbaChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[maskRcdParitySideEffects] " int32_t l_rc = SUCCESS; do { //use a data bundle to get the membuf chip CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip ); ExtensibleChip * membChip = mbadb->getMembChip(); if (NULL == membChip) { PRDF_ERR(PRDF_FUNC "getMembChip() failed"); break; } //get the masks for each FIR SCAN_COMM_REGISTER_CLASS * mbsFirMaskOr = membChip->getRegister("MBSFIR_MASK_OR"); SCAN_COMM_REGISTER_CLASS * mbaCalMaskOr = i_mbaChip->getRegister("MBACALFIR_MASK_OR"); SCAN_COMM_REGISTER_CLASS * mbaFirMaskOr = i_mbaChip->getRegister("MBAFIR_MASK_OR"); mbaFirMaskOr->SetBit(2); mbaCalMaskOr->SetBit(2); mbaCalMaskOr->SetBit(17); mbsFirMaskOr->SetBit(4); l_rc = mbaFirMaskOr->Write(); l_rc |= mbaCalMaskOr->Write(); l_rc |= mbsFirMaskOr->Write(); if (SUCCESS != l_rc) { PRDF_ERR(PRDF_FUNC "MBAFIR_MASK_OR/MBACALFIR_MASK_OR/MBSFIR_MASK_OR" " write failed for 0x%08x", i_mbaChip->GetId()); break; } }while(0); return SUCCESS; #undef PRDF_FUNC }
int32_t getDramSize( ExtensibleChip *i_mbaChip, uint8_t & o_size ) { #define PRDF_FUNC "[MemUtils::getDramSize] " int32_t o_rc = SUCCESS; o_size = SIZE_2GB; do { TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle(); CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip ); ExtensibleChip * membufChip = mbadb->getMembChip(); if ( NULL == membufChip ) { PRDF_ERR( PRDF_FUNC "getMembChip() failed: MBA=0x%08x", getHuid(mbaTrgt) ); o_rc = FAIL; break; } uint32_t pos = getTargetPosition(mbaTrgt); const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR"; SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x", reg_str, getHuid(mbaTrgt) ); break; } o_size = reg->GetBitFieldJustified( 6, 2 ); } while(0); return o_rc; #undef PRDF_FUNC }
int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank, MaintSymbols & o_maintStats, CenSymbol & o_chipMark, uint8_t i_thr ) { #define PRDF_FUNC "[MemUtils::collectCeStats] " int32_t o_rc = SUCCESS; o_chipMark = CenSymbol(); // Initially invalid. do { if ( 0 == i_thr ) // Must be non-zero { PRDF_ERR( PRDF_FUNC "i_thr %d is invalid", i_thr ); o_rc = FAIL; break; } TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle(); CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip ); ExtensibleChip * membufChip = mbadb->getMembChip(); if ( NULL == membufChip ) { PRDF_ERR( PRDF_FUNC "getMembChip() failed" ); o_rc = FAIL; break; } uint8_t mbaPos = getTargetPosition( mbaTrgt ); if ( MAX_MBA_PER_MEMBUF <= mbaPos ) { PRDF_ERR( PRDF_FUNC "mbaPos %d is invalid", mbaPos ); o_rc = FAIL; break; } const bool isX4 = isDramWidthX4(mbaTrgt); // Get the current spares on this rank. CenSymbol sp0, sp1, ecc; o_rc = mssGetSteerMux( mbaTrgt, i_rank, sp0, sp1, ecc ); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed." ); break; } // Use this map to keep track of the total counts per DRAM. DramCountMap dramCounts; const char * reg_str = NULL; SCAN_COMM_REGISTER_CLASS * reg = NULL; for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_MBA; regIdx++ ) { reg_str = mbsCeStatReg[mbaPos][regIdx]; reg = membufChip->getRegister( reg_str ); o_rc = reg->Read(); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str ); break; } uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx; for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; i++ ) { uint8_t count = reg->GetBitFieldJustified( (i*8), 8 ); if ( 0 == count ) continue; // nothing to do uint8_t sym = baseSymbol + i; uint8_t dram = symbol2Dram( sym, isX4 ); // Keep track of the total DRAM counts. dramCounts[dram].totalCount += count; // Add any symbols that have exceeded threshold to the list. if ( i_thr <= count ) { // Keep track of the total number of symbols per DRAM that // have exceeded threshold. dramCounts[dram].symbolCount++; SymbolData symData; symData.symbol = CenSymbol::fromSymbol( mbaTrgt, i_rank, sym, CEN_SYMBOL::BOTH_SYMBOL_DQS ); if ( !symData.symbol.isValid() ) { PRDF_ERR( PRDF_FUNC "CenSymbol() failed: symbol=%d", sym ); o_rc = FAIL; break; } else { // Check if this symbol is on any of the spares. if ( ( sp0.isValid() && (sp0.getDram() == symData.symbol.getDram()) ) || ( sp1.isValid() && (sp1.getDram() == symData.symbol.getDram()) ) ) { symData.symbol.setDramSpared(); } if ( ecc.isValid() && (ecc.getDram() == symData.symbol.getDram()) ) { symData.symbol.setEccSpared(); } // Add the symbol to the list. symData.count = count; o_maintStats.push_back( symData ); } } } if ( SUCCESS != o_rc ) break; } if ( SUCCESS != o_rc ) break; if ( o_maintStats.empty() ) break; // no need to continue // Sort the list of symbols. std::sort( o_maintStats.begin(), o_maintStats.end(), sortSymDataCount ); // Get the DRAM with the highest count. uint32_t highestDram = 0; uint32_t highestCount = 0; const uint32_t symbolTH = isX4 ? 1 : 2; for ( DramCountMap::iterator it = dramCounts.begin(); it != dramCounts.end(); ++it ) { if ( (symbolTH <= it->second.symbolCount) && (highestCount < it->second.totalCount ) ) { highestDram = it->first; highestCount = it->second.totalCount; } } if ( 0 != highestCount ) { uint8_t sym = dram2Symbol( highestDram, isX4 ); o_chipMark = CenSymbol::fromSymbol( mbaTrgt, i_rank, sym ); // Check if this symbol is on any of the spares. if ( ( sp0.isValid() && (sp0.getDram() == o_chipMark.getDram()) ) || ( sp1.isValid() && (sp1.getDram() == o_chipMark.getDram()) ) ) { o_chipMark.setDramSpared(); } if ( ecc.isValid() && (ecc.getDram() == o_chipMark.getDram()) ) { o_chipMark.setEccSpared(); } } } while(0); if ( SUCCESS != o_rc ) { PRDF_ERR( PRDF_FUNC "Failed: i_mbaChip=0x%08x i_rank=m%ds%d i_thr=%d", i_mbaChip->GetId(), i_rank.getMaster(), i_rank.getSlave(), i_thr ); } return o_rc; #undef PRDF_FUNC }
int32_t chnlCsCleanup( ExtensibleChip *i_mbChip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[MemUtils::chnlCsCleanup] " int32_t o_rc = SUCCESS; do { if( ( NULL == i_mbChip ) || ( TYPE_MEMBUF != getTargetType( i_mbChip->GetChipHandle() ))) { PRDF_ERR( PRDF_FUNC "Invalid parameters" ); o_rc = FAIL; break; } if (( ! i_sc.service_data->IsUnitCS() ) || (CHECK_STOP == i_sc.service_data->getPrimaryAttnType()) ) break; CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip ); if ( !mbdb->iv_doChnlFailCleanup ) break; // Cleanup has already been done. // Set it as SUE generation point. i_sc.service_data->SetFlag( ServiceDataCollector::UERE ); ExtensibleChip * mcsChip = mbdb->getMcsChip(); if ( NULL == mcsChip ) { PRDF_ERR( PRDF_FUNC "MCS chip is NULL for Membuf:0x%08X", i_mbChip->GetId() ); o_rc = FAIL; break; } TargetHandle_t mcs = mcsChip->GetChipHandle(); ExtensibleChip * procChip = NULL; uint8_t pos = getTargetPosition( mcs ); TargetHandle_t proc = getParentChip ( mcs ); if ( NULL == proc ) { PRDF_ERR( PRDF_FUNC "Proc is NULL for Mcs:0x%08X", getHuid( mcs ) ); o_rc = FAIL; break; } procChip = (ExtensibleChip *)systemPtr->GetChip( proc ); if( NULL == procChip ) { PRDF_ERR( PRDF_FUNC "Can not find Proc chip for HUID:0x%08X", getHuid( proc) ); o_rc = FAIL; break; } // This is a cleanup function. If we get any error from scom // operations, we will still continue with cleanup. SCAN_COMM_REGISTER_CLASS * l_tpMask = procChip->getRegister("TP_CHIPLET_FIR_MASK"); o_rc |= l_tpMask->Read(); if ( SUCCESS == o_rc ) { // Bits 5-12 maps to attentions from MCS0-MCS7. l_tpMask->SetBit( 5 + pos ); o_rc |= l_tpMask->Write(); } // Mask attentions from the Centaur const char *iomcFirMask = ( pos < 4 )? "IOMCFIR_0_MASK_OR":"IOMCFIR_1_MASK_OR"; SCAN_COMM_REGISTER_CLASS * iomcMask = procChip->getRegister( iomcFirMask); if ( pos >= 4 ) pos -= 4; // 8 bits are reserved for each Centaur in IOMCFIR. // There are total 4 ( for P system ) centaur supported // in MCS. Bits for first centaur starts from bit 8. iomcMask->SetBitFieldJustified( 8+ ( pos*8 ), 8, 0xff); o_rc |= iomcMask->Write(); SCAN_COMM_REGISTER_CLASS * l_tpfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_nestfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_memfirmask = NULL; SCAN_COMM_REGISTER_CLASS * l_memspamask = NULL; l_tpfirmask = i_mbChip->getRegister("TP_CHIPLET_FIR_MASK"); l_nestfirmask = i_mbChip->getRegister("NEST_CHIPLET_FIR_MASK"); l_memfirmask = i_mbChip->getRegister("MEM_CHIPLET_FIR_MASK"); l_memspamask = i_mbChip->getRegister("MEM_CHIPLET_SPA_MASK"); l_tpfirmask->setAllBits(); o_rc |= l_tpfirmask->Write(); l_nestfirmask->setAllBits(); o_rc |= l_nestfirmask->Write(); l_memfirmask->setAllBits(); o_rc |= l_memfirmask->Write(); l_memspamask->setAllBits(); o_rc |= l_memspamask->Write(); for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) { ExtensibleChip * mbaChip = mbdb->getMbaChip( i ); if( NULL != mbaChip ) { TargetHandle_t mba = mbaChip->GetChipHandle(); if ( NULL != mba ) { #if defined(__HOSTBOOT_MODULE) && \ !defined(__HOSTBOOT_RUNTIME) // This is very small platform specific code. So not // creating a separate file for this. int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "mdiaSendEventMsg(0x%08x, SKIP_MBA) " "failed", getHuid( mba ) ); o_rc |= l_rc; } #else int32_t l_rc = DEALLOC::mbaGard( mbaChip ); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC "mbaGard failed. HUID: 0x%08x", getHuid( mba ) ); o_rc |= l_rc; } #endif // __HOSTBOOT_MODULE } } } // Clean up complete an is no longer required. mbdb->iv_doChnlFailCleanup = false; } while(0); return o_rc; #undef PRDF_FUNC }
/** * @fn ClearMbaCalSecondaryBits * @brief Clears MBACAL secondary Fir bits which may come up because of MBSFIR * @param i_chip The Centaur chip. * @param i_sc ServiceDataColector. * @return SUCCESS. */ int32_t ClearMbaCalSecondaryBits( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & i_sc ) { #define PRDF_FUNC "[ClearMbaCalSecondaryBits ] " int32_t l_rc = SUCCESS; do { SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR"); SCAN_COMM_REGISTER_CLASS * mbsFirMask = i_chip->getRegister("MBSFIR_MASK"); l_rc = mbsFir->Read(); l_rc |= mbsFirMask->Read(); if ( SUCCESS != l_rc ) { PRDF_ERR( PRDF_FUNC"MBSFIR/MBSFIR_MASK read failed" "for 0x%08x", i_chip->GetId()); break; } CenMembufDataBundle * membdb = getMembufDataBundle( i_chip ); for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ ) { ExtensibleChip * mbaChip = membdb->getMbaChip(i); if ( NULL == mbaChip ) continue; SCAN_COMM_REGISTER_CLASS * mbaCalFir = mbaChip->getRegister("MBACALFIR"); if( SUCCESS != mbaCalFir->Read() ) { // Do not break. Just print error trace and look for // other MBA. PRDF_ERR( PRDF_FUNC"MBACALFIR read failed" "for 0x%08x", mbaChip->GetId()); continue; } if( !( mbaCalFir->IsBitSet( 10 ) || mbaCalFir->IsBitSet( 14 ) )) continue; SCAN_COMM_REGISTER_CLASS * mbaCalAndFir = mbaChip->getRegister("MBACALFIR_AND"); mbaCalAndFir->setAllBits(); mbaCalAndFir->ClearBit(10); mbaCalAndFir->ClearBit(14); l_rc = mbaCalAndFir->Write(); if ( SUCCESS != l_rc ) { // Do not break. Just print error trace and look for // other MBA. PRDF_ERR( PRDF_FUNC"MBACALFIR_AND write failed" "for 0x%08x", mbaChip->GetId()); } } }while( 0 ); return SUCCESS; #undef PRDF_FUNC } PRDF_PLUGIN_DEFINE( Membuf, ClearMbaCalSecondaryBits );
int32_t cleanupSecondaryFirBits( ExtensibleChip * i_chip, TYPE i_busType, uint32_t i_busPos ) { int32_t l_rc = SUCCESS; TargetHandle_t mcsTgt = NULL; TargetHandle_t mbTgt = NULL; ExtensibleChip * mcsChip = NULL; ExtensibleChip * mbChip = NULL; //In case of spare deployed attention for DMI bus, we need to clear // secondary MBIFIR[10] and MCIFIR[10] bits. do { if ( i_busType == TYPE_MCS ) { mcsTgt = getConnectedChild( i_chip->GetChipHandle(), TYPE_MCS, i_busPos); if (!mcsTgt) break; mcsChip = ( ExtensibleChip * )systemPtr->GetChip( mcsTgt ); if (!mcsChip) break; mbChip = getMcsDataBundle( mcsChip )->getMembChip(); if (!mbChip) break; mbTgt = mbChip->GetChipHandle(); if (!mbTgt) break; } else if ( i_busType == TYPE_MEMBUF ) { mbTgt = i_chip->GetChipHandle(); if (!mbTgt) break; mcsChip = getMembufDataBundle( i_chip )->getMcsChip(); if (!mcsChip) break; mcsTgt = mcsChip->GetChipHandle(); if (!mcsTgt) break; mbChip = i_chip; } else { // We only need to clean secondary FIR bits for DMI bus l_rc = SUCCESS; break; } SCAN_COMM_REGISTER_CLASS * mciAnd = mcsChip->getRegister("MCIFIR_AND"); SCAN_COMM_REGISTER_CLASS * mbiAnd = mbChip->getRegister( "MBIFIR_AND"); mciAnd->setAllBits(); mciAnd->ClearBit(10); mbiAnd->setAllBits(); mbiAnd->ClearBit(10); l_rc = mciAnd->Write(); l_rc |= mbiAnd->Write(); if ( SUCCESS != l_rc ) { PRDF_ERR( "[cleanupSecondaryFirBits] Write() failed on " "MCIFIR/MBIFIR: MCS=0x%08x MEMB=0x%08x", mcsChip->GetId(), mbChip->GetId() ); break; } } while (0); return l_rc; }