Beispiel #1
0
/**
 * @brief Handles MCS Channel fail bits, if they exist.
 *
 * @param  i_membChip   The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 *
 * @return SUCCESS if MCS channel fail is present and properly
 *         handled, FAIL otherwise.
 */
int32_t handleMcsChnlCs( ExtensibleChip * i_membChip,
                    STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[handleMcsChnlCs] "

    // We will return FAIL from this function if MCS channel fail  bits
    // are not set. If MCS channel fail bits are set, we will try to analyze
    // Mcs. If MCS is not analyzed properly, we will return FAIL.
    // This will trigger rule code to execute alternate resolution.

    int32_t l_rc = SUCCESS;
    do
    {
        CenMembufDataBundle * mbdb = getMembufDataBundle( i_membChip );
        ExtensibleChip * mcsChip =    mbdb->getMcsChip();
        if( NULL == mcsChip )
        {
            l_rc = FAIL;
            break;
        }

        SCAN_COMM_REGISTER_CLASS * mciFir = mcsChip->getRegister("MCIFIR");
        SCAN_COMM_REGISTER_CLASS * mciFirMask =
                                        mcsChip->getRegister("MCIFIR_MASK");

        l_rc = mciFir->Read();
        l_rc |= mciFirMask->Read();

        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MCIFIR/MCIFIR_MASK read failed for 0x%08x",
                      mcsChip->GetId());
            break;
        }

        // If any of MCS channel fail bit is set, we will analyze
        // MCS. It is safe to do hard coded check as channel fail
        // bits are hard wired and and they can not change without HW
        // change.
        // bits 0,1, 6, 8, 9, 22, 23, 40 are channel fail bits.
        uint64_t chnlCsBitsMask = 0xC2C0030000800000ull;
        uint64_t mciFirBits     = mciFir->GetBitFieldJustified(0, 64);
        uint64_t mciFirMaskBits = mciFirMask->GetBitFieldJustified(0, 64);

        if ( mciFirBits & ~mciFirMaskBits & chnlCsBitsMask )
        {
            l_rc = mcsChip->Analyze( i_sc,
                        i_sc.service_data->GetCauseAttentionType() );

            if( SUCCESS == l_rc ) break;
        }

        l_rc = FAIL;

    }while( 0 );

    return l_rc;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, handleMcsChnlCs );
Beispiel #2
0
/**
 * @fn MaskMbaCalSecondaryBits
 * @brief Mask MBACAL secondary Fir bits which may come up because of L4 UE.
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.
 */
int32_t MaskMbaCalSecondaryBits( ExtensibleChip * i_chip,
                                 STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[MaskMbaCalSecondaryBits ] "
    int32_t l_rc = SUCCESS;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_chip );

        for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = membdb->getMbaChip(i);
            if ( NULL == mbaChip ) continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalFirMaskOr =
                                mbaChip->getRegister("MBACALFIR_MASK_OR");

            mbaCalFirMaskOr->SetBit(9);
            mbaCalFirMaskOr->SetBit(15);
            l_rc = mbaCalFirMaskOr->Write();
            if ( SUCCESS != l_rc )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR_MASK_OR write failed"
                         "for 0x%08x", mbaChip->GetId());
            }
        }
    }while( 0 );

    return SUCCESS;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, MaskMbaCalSecondaryBits );
Beispiel #3
0
bool isSpareBitOnDMIBus( ExtensibleChip * i_mcsChip, ExtensibleChip * i_mbChip )
{
    bool bitOn = false;

    do
    {
        // If any of these object is NULL, spare bit should not be on.
        if ( ( NULL == i_mcsChip ) || ( NULL == i_mbChip ))
            break;

        // check spare deployed bit on Centaur side
        SCAN_COMM_REGISTER_CLASS * dmiFir = i_mbChip->getRegister( "DMIFIR" );
        int32_t rc = dmiFir->Read();
        if ( SUCCESS != rc )
        {
            PRDF_ERR("isSpareBitOnDMIBus() : Failed to read DMIFIR."
                      "MEMBUF: 0x%08X", getHuid( i_mbChip->GetChipHandle()) );
            break;
        }
        if ( dmiFir->IsBitSet( 9 ))
        {
            bitOn = true;
            break;
        }

        // check spare deployed bit on Proc side
        TargetHandle_t mcsTgt = i_mcsChip->GetChipHandle();
        TargetHandle_t procTgt = getConnectedParent( mcsTgt, TYPE_PROC );
        ExtensibleChip * procChip =
                        ( ExtensibleChip * )systemPtr->GetChip( procTgt );

        uint32_t mcsPos = getTargetPosition( mcsTgt );

        const char * regStr = ( 4 > mcsPos) ? "IOMCFIR_0" : "IOMCFIR_1";
        SCAN_COMM_REGISTER_CLASS * iomcFir = procChip->getRegister( regStr );
        rc = iomcFir->Read();
        if ( SUCCESS != rc )
        {
            PRDF_ERR("isSpareBitOnDMIBus() : Failed to read %s."
                      "MCS: 0x%08X", regStr, getHuid(mcsTgt) );
            break;
        }
        // Bit 9, 17, 25 and 33 are for spare deployed.
        // Check bit corrosponding to MCS position
        uint8_t bitPos = 9 + ( mcsPos % 4 ) *8;
        if ( iomcFir->IsBitSet(bitPos))
        {
            bitOn = true;
        }

    }while(0);

    return bitOn;
}
Beispiel #4
0
/**
 * @brief  Plugin to mask the side effects of an RCD parity error
 * @param  i_mbaChip A Centaur MBA chip.
 * @param  i_sc      The step code data struct.
 * @return SUCCESS
 */
int32_t maskRcdParitySideEffects( ExtensibleChip * i_mbaChip,
                                    STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[maskRcdParitySideEffects] "

    int32_t l_rc = SUCCESS;

    do
    {
        //use a data bundle to get the membuf chip
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membChip = mbadb->getMembChip();
        if (NULL == membChip)
        {
            PRDF_ERR(PRDF_FUNC "getMembChip() failed");
            break;
        }

        //get the masks for each FIR
        SCAN_COMM_REGISTER_CLASS * mbsFirMaskOr =
            membChip->getRegister("MBSFIR_MASK_OR");
        SCAN_COMM_REGISTER_CLASS * mbaCalMaskOr =
            i_mbaChip->getRegister("MBACALFIR_MASK_OR");
        SCAN_COMM_REGISTER_CLASS * mbaFirMaskOr =
            i_mbaChip->getRegister("MBAFIR_MASK_OR");

        mbaFirMaskOr->SetBit(2);
        mbaCalMaskOr->SetBit(2);
        mbaCalMaskOr->SetBit(17);
        mbsFirMaskOr->SetBit(4);

        l_rc =  mbaFirMaskOr->Write();
        l_rc |= mbaCalMaskOr->Write();
        l_rc |= mbsFirMaskOr->Write();

        if (SUCCESS != l_rc)
        {
            PRDF_ERR(PRDF_FUNC "MBAFIR_MASK_OR/MBACALFIR_MASK_OR/MBSFIR_MASK_OR"
                    " write failed for 0x%08x", i_mbaChip->GetId());
            break;
        }
    }while(0);

    return SUCCESS;
    #undef PRDF_FUNC
}
Beispiel #5
0
int32_t getDramSize( ExtensibleChip *i_mbaChip, uint8_t & o_size )
{
    #define PRDF_FUNC "[MemUtils::getDramSize] "

    int32_t o_rc = SUCCESS;
    o_size = SIZE_2GB;

    do
    {
        TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle();
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membufChip = mbadb->getMembChip();
        if ( NULL == membufChip )
        {
            PRDF_ERR( PRDF_FUNC "getMembChip() failed: MBA=0x%08x",
                      getHuid(mbaTrgt) );
            o_rc = FAIL; break;
        }

        uint32_t pos = getTargetPosition(mbaTrgt);
        const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR";

        SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str );
        o_rc = reg->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x",
                      reg_str, getHuid(mbaTrgt) );
            break;
        }
        o_size = reg->GetBitFieldJustified( 6, 2 );

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Beispiel #6
0
int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank,
                        MaintSymbols & o_maintStats, CenSymbol & o_chipMark,
                        uint8_t i_thr )
{
    #define PRDF_FUNC "[MemUtils::collectCeStats] "

    int32_t o_rc = SUCCESS;

    o_chipMark = CenSymbol(); // Initially invalid.

    do
    {
        if ( 0 == i_thr ) // Must be non-zero
        {
            PRDF_ERR( PRDF_FUNC "i_thr %d is invalid", i_thr );
            o_rc = FAIL; break;
        }

        TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle();
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membufChip = mbadb->getMembChip();
        if ( NULL == membufChip )
        {
            PRDF_ERR( PRDF_FUNC "getMembChip() failed" );
            o_rc = FAIL; break;
        }

        uint8_t mbaPos = getTargetPosition( mbaTrgt );
        if ( MAX_MBA_PER_MEMBUF <= mbaPos )
        {
            PRDF_ERR( PRDF_FUNC "mbaPos %d is invalid", mbaPos );
            o_rc = FAIL; break;
        }

        const bool isX4 = isDramWidthX4(mbaTrgt);

        // Get the current spares on this rank.
        CenSymbol sp0, sp1, ecc;
        o_rc = mssGetSteerMux( mbaTrgt, i_rank, sp0, sp1, ecc );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed." );
            break;
        }

        // Use this map to keep track of the total counts per DRAM.
        DramCountMap dramCounts;

        const char * reg_str = NULL;
        SCAN_COMM_REGISTER_CLASS * reg = NULL;

        for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_MBA; regIdx++ )
        {
            reg_str = mbsCeStatReg[mbaPos][regIdx];
            reg     = membufChip->getRegister( reg_str );

            o_rc = reg->Read();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
                break;
            }

            uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx;

            for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; i++ )
            {
                uint8_t count = reg->GetBitFieldJustified( (i*8), 8 );

                if ( 0 == count ) continue; // nothing to do

                uint8_t sym  = baseSymbol + i;
                uint8_t dram = symbol2Dram( sym, isX4 );

                // Keep track of the total DRAM counts.
                dramCounts[dram].totalCount += count;

                // Add any symbols that have exceeded threshold to the list.
                if ( i_thr <= count )
                {
                    // Keep track of the total number of symbols per DRAM that
                    // have exceeded threshold.
                    dramCounts[dram].symbolCount++;

                    SymbolData symData;
                    symData.symbol = CenSymbol::fromSymbol( mbaTrgt, i_rank,
                                            sym, CEN_SYMBOL::BOTH_SYMBOL_DQS );
                    if ( !symData.symbol.isValid() )
                    {
                        PRDF_ERR( PRDF_FUNC "CenSymbol() failed: symbol=%d",
                                  sym );
                        o_rc = FAIL;
                        break;
                    }
                    else
                    {
                        // Check if this symbol is on any of the spares.
                        if ( ( sp0.isValid() &&
                               (sp0.getDram() == symData.symbol.getDram()) ) ||
                             ( sp1.isValid() &&
                               (sp1.getDram() == symData.symbol.getDram()) ) )
                        {
                            symData.symbol.setDramSpared();
                        }
                        if ( ecc.isValid() &&
                             (ecc.getDram() == symData.symbol.getDram()) )
                        {
                            symData.symbol.setEccSpared();
                        }

                        // Add the symbol to the list.
                        symData.count = count;
                        o_maintStats.push_back( symData );
                    }
                }
            }
            if ( SUCCESS != o_rc ) break;
        }
        if ( SUCCESS != o_rc ) break;

        if ( o_maintStats.empty() ) break; // no need to continue

        // Sort the list of symbols.
        std::sort( o_maintStats.begin(), o_maintStats.end(), sortSymDataCount );

        // Get the DRAM with the highest count.
        uint32_t highestDram  = 0;
        uint32_t highestCount = 0;
        const uint32_t symbolTH = isX4 ? 1 : 2;
        for ( DramCountMap::iterator it = dramCounts.begin();
              it != dramCounts.end(); ++it )
        {
            if ( (symbolTH     <= it->second.symbolCount) &&
                 (highestCount <  it->second.totalCount ) )
            {
                highestDram  = it->first;
                highestCount = it->second.totalCount;
            }
        }

        if ( 0 != highestCount )
        {
            uint8_t sym = dram2Symbol( highestDram, isX4 );
            o_chipMark  = CenSymbol::fromSymbol( mbaTrgt, i_rank, sym );

            // Check if this symbol is on any of the spares.
            if ( ( sp0.isValid() && (sp0.getDram() == o_chipMark.getDram()) ) ||
                 ( sp1.isValid() && (sp1.getDram() == o_chipMark.getDram()) ) )
            {
                o_chipMark.setDramSpared();
            }
            if ( ecc.isValid() && (ecc.getDram() == o_chipMark.getDram()) )
            {
                o_chipMark.setEccSpared();
            }
        }

    } while(0);

    if ( SUCCESS != o_rc )
    {
        PRDF_ERR( PRDF_FUNC "Failed: i_mbaChip=0x%08x i_rank=m%ds%d i_thr=%d",
                  i_mbaChip->GetId(), i_rank.getMaster(), i_rank.getSlave(),
                  i_thr );
    }

    return o_rc;

    #undef PRDF_FUNC
}
Beispiel #7
0
int32_t chnlCsCleanup( ExtensibleChip *i_mbChip,
                       STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[MemUtils::chnlCsCleanup] "

    int32_t o_rc = SUCCESS;

    do
    {
        if( (  NULL == i_mbChip ) ||
            ( TYPE_MEMBUF != getTargetType( i_mbChip->GetChipHandle() )))
        {
            PRDF_ERR( PRDF_FUNC "Invalid parameters" );
            o_rc = FAIL; break;
        }

        if (( ! i_sc.service_data->IsUnitCS() ) ||
              (CHECK_STOP == i_sc.service_data->getPrimaryAttnType()) )
            break;

        CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
        if ( !mbdb->iv_doChnlFailCleanup )
            break; // Cleanup has already been done.

        // Set it as SUE generation point.
        i_sc.service_data->SetFlag( ServiceDataCollector::UERE );

        ExtensibleChip * mcsChip = mbdb->getMcsChip();
        if ( NULL == mcsChip )
        {
            PRDF_ERR( PRDF_FUNC "MCS chip is NULL for Membuf:0x%08X",
                      i_mbChip->GetId() );
            o_rc = FAIL; break;
        }

        TargetHandle_t mcs = mcsChip->GetChipHandle();
        ExtensibleChip * procChip = NULL;
        uint8_t pos = getTargetPosition( mcs );
        TargetHandle_t proc = getParentChip ( mcs );

        if ( NULL == proc )
        {
            PRDF_ERR( PRDF_FUNC "Proc is NULL for Mcs:0x%08X", getHuid( mcs ) );
            o_rc = FAIL; break;
        }

        procChip = (ExtensibleChip *)systemPtr->GetChip( proc );

        if( NULL == procChip )
        {
            PRDF_ERR( PRDF_FUNC "Can not find Proc chip for HUID:0x%08X",
                      getHuid( proc) );
            o_rc = FAIL; break;
        }

        // This is a cleanup function. If we get any error from scom
        // operations, we will still continue with cleanup.
        SCAN_COMM_REGISTER_CLASS * l_tpMask =
              procChip->getRegister("TP_CHIPLET_FIR_MASK");
        o_rc |= l_tpMask->Read();
        if ( SUCCESS == o_rc )
        {
            // Bits 5-12 maps to attentions from MCS0-MCS7.
            l_tpMask->SetBit( 5 + pos );
            o_rc |= l_tpMask->Write();
        }

        // Mask attentions from the Centaur
        const char *iomcFirMask = ( pos < 4 )?
                                  "IOMCFIR_0_MASK_OR":"IOMCFIR_1_MASK_OR";

        SCAN_COMM_REGISTER_CLASS * iomcMask =
                                 procChip->getRegister( iomcFirMask);
        if ( pos >= 4 ) pos -= 4;

        // 8 bits are reserved for each Centaur in IOMCFIR.
        // There are total 4 ( for P system ) centaur supported
        // in MCS. Bits for first centaur starts from bit 8.

        iomcMask->SetBitFieldJustified( 8+ ( pos*8 ), 8, 0xff);

        o_rc |= iomcMask->Write();

        SCAN_COMM_REGISTER_CLASS * l_tpfirmask   = NULL;
        SCAN_COMM_REGISTER_CLASS * l_nestfirmask = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memfirmask  = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memspamask  = NULL;

        l_tpfirmask   = i_mbChip->getRegister("TP_CHIPLET_FIR_MASK");
        l_nestfirmask = i_mbChip->getRegister("NEST_CHIPLET_FIR_MASK");
        l_memfirmask  = i_mbChip->getRegister("MEM_CHIPLET_FIR_MASK");
        l_memspamask  = i_mbChip->getRegister("MEM_CHIPLET_SPA_MASK");

        l_tpfirmask->setAllBits();   o_rc |= l_tpfirmask->Write();
        l_nestfirmask->setAllBits(); o_rc |= l_nestfirmask->Write();
        l_memfirmask->setAllBits();  o_rc |= l_memfirmask->Write();
        l_memspamask->setAllBits();  o_rc |= l_memspamask->Write();


        for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = mbdb->getMbaChip( i );
            if( NULL != mbaChip )
            {
                TargetHandle_t mba = mbaChip->GetChipHandle();
                if ( NULL != mba )
                {
                    #if  defined(__HOSTBOOT_MODULE) && \
                        !defined(__HOSTBOOT_RUNTIME)
                    // This is very small platform specific code. So not
                    // creating a separate file for this.
                    int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mdiaSendEventMsg(0x%08x, SKIP_MBA) "
                                  "failed", getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #else
                    int32_t l_rc = DEALLOC::mbaGard( mbaChip  );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mbaGard failed. HUID: 0x%08x",
                                  getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #endif // __HOSTBOOT_MODULE
                }
            }
        }

        // Clean up complete an is no longer required.
        mbdb->iv_doChnlFailCleanup = false;

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
void addMemChipletFirRegs( ExtensibleChip * i_membChip, CaptureData & io_cd )
{
    #define PRDF_FUNC "[CenMbaCaptureData::addMemChipletFirRegs] "

    int32_t l_rc = SUCCESS;

    do
    {
        if ( NULL == i_membChip )
        {
            PRDF_ERR( PRDF_FUNC "Given target is NULL" );
            break;
        }

        if ( TYPE_MEMBUF != getTargetType(i_membChip->GetChipHandle()) )
        {
            PRDF_ERR( PRDF_FUNC "Invalid target type: i_membChip=0x%08x",
                      i_membChip->GetId() );
            break;
        }

        SCAN_COMM_REGISTER_CLASS * cs_global, * re_global, * spa_global;
        cs_global  = i_membChip->getRegister("GLOBAL_CS_FIR");
        re_global  = i_membChip->getRegister("GLOBAL_RE_FIR");
        spa_global = i_membChip->getRegister("GLOBAL_SPA");
        l_rc  = cs_global->Read() | re_global->Read() | spa_global->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC "Failed to read a GLOBAL register on "
                      "0x%08x", i_membChip->GetId() );
            break;
        }

        // If global bit 3 is not on, can't scom mem chiplets or mba's
        if( ! (cs_global->IsBitSet(3) ||
               re_global->IsBitSet(3) ||
               spa_global->IsBitSet(3)) )
        {
            break;
        }

        i_membChip->CaptureErrorData(io_cd,
                                     Util::hashString("MemChipletRegs"));

        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );

        for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = membdb->getMbaChip(i);
            if ( NULL == mbaChip )
            {
                PRDF_ERR( PRDF_FUNC "MEM_CHIPLET registers indicated an "
                          "attention but no chip found: i_membChip=0x%08x "
                          "i=%d", i_membChip->GetId(), i );
                continue;
            }

            mbaChip->CaptureErrorData(io_cd,
                                      Util::hashString("MemChipletRegs") );
        }

    } while (0);

    #undef PRDF_FUNC
}
Beispiel #9
0
/**
 * @fn ClearMbaCalSecondaryBits
 * @brief Clears MBACAL secondary Fir bits which may come up because of MBSFIR
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.

 */
int32_t ClearMbaCalSecondaryBits( ExtensibleChip * i_chip,
                                  STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[ClearMbaCalSecondaryBits ] "
    int32_t l_rc = SUCCESS;

    do
    {
        SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR");
        SCAN_COMM_REGISTER_CLASS * mbsFirMask =
                                        i_chip->getRegister("MBSFIR_MASK");
        l_rc = mbsFir->Read();
        l_rc |= mbsFirMask->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MBSFIR/MBSFIR_MASK read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        CenMembufDataBundle * membdb = getMembufDataBundle( i_chip );

        for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = membdb->getMbaChip(i);
            if ( NULL == mbaChip ) continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalFir =
                                mbaChip->getRegister("MBACALFIR");

            if( SUCCESS != mbaCalFir->Read() )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR read failed"
                         "for 0x%08x", mbaChip->GetId());
                continue;
            }

            if( !( mbaCalFir->IsBitSet( 10 ) || mbaCalFir->IsBitSet( 14 ) ))
                continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalAndFir =
                                mbaChip->getRegister("MBACALFIR_AND");

            mbaCalAndFir->setAllBits();

            mbaCalAndFir->ClearBit(10);
            mbaCalAndFir->ClearBit(14);

            l_rc = mbaCalAndFir->Write();
            if ( SUCCESS != l_rc )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR_AND write failed"
                              "for 0x%08x", mbaChip->GetId());
            }
        }

    }while( 0 );

    return SUCCESS;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, ClearMbaCalSecondaryBits );
Beispiel #10
0
/**
 * @brief  MBSECCFIR[19] - Fetch UE.
 * @param  i_membChip A Centaur chip.
 * @param  i_sc       The step code data struct.
 * @param  i_mbaPos   The MBA position.
 * @return SUCCESS
 */
int32_t AnalyzeFetchUe( ExtensibleChip * i_membChip,
                        STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos )
{
    #define PRDF_FUNC "[AnalyzeFetchUe] "

    int32_t l_rc = SUCCESS;

    ExtensibleChip * mbaChip = NULL;

    do
    {
        // All memory UEs should be customer viewable. Normally, this would be
        // done by setting the threshold to 1, but we do not want to mask UEs
        // on the first occurrence.
        i_sc.service_data->SetServiceCall();

        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
        mbaChip = membdb->getMbaChip( i_mbaPos );
        if ( NULL == mbaChip )
        {
            PRDF_ERR( PRDF_FUNC"getMbaChip() returned NULL" );
            l_rc = FAIL; break;
        }

        CenAddr addr;
        l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_UE_ADDR, addr );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"getCenReadAddr() failed" );
            break;
        }
        CenRank rank = addr.getRank();

        // Add address to UE table.
        CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
        mbadb->iv_ueTable.addEntry( UE_TABLE::FETCH_UE, addr );

        // Callout the rank.
        MemoryMru memmru ( mbaChip->GetChipHandle(), rank,
                           MemoryMruData::CALLOUT_RANK );
        i_sc.service_data->SetCallout( memmru );

        // Add a TPS request to the TD queue and ban any further TPS requests
        // for this rank.
        l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
                                               CenMbaTdCtlrCommon::TPS_EVENT,
                                               true );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"handleTdEvent() failed: rank=m%ds%d",
                      rank.getMaster(), rank.getSlave() );
            // We are not adding break here as we still want to do lmbGard
            // If you want to add any code after this which depends on result
            // of handleTdEvent result, add the code judicially.
        }

        #ifndef __HOSTBOOT_MODULE
        // Send lmb gard message to PHYP.
        int32_t lmbRc =  DEALLOC::lmbGard( mbaChip, addr );
        if ( SUCCESS != lmbRc )
        {
            PRDF_ERR( PRDF_FUNC"lmbGard() failed" );
            l_rc = lmbRc; break;
        }
        #endif

    } while (0);

    // Add ECC capture data for FFDC.
    if ( NULL != mbaChip )
        CenMbaCaptureData::addMemEccData( mbaChip, i_sc );

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_membChip=0x%08x i_mbaPos=%d",
                  i_membChip->GetId(), i_mbaPos );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS; // Intentionally return SUCCESS for this plugin

    #undef PRDF_FUNC
}
Beispiel #11
0
/**
 * @brief  Fetch Retry CE / Prefetch UE Errors.
 * @param  i_membChip A Centaur chip.
 * @param  i_sc         The step code data struct.
 * @param  i_mbaPos     The MBA position.
 * @param  i_isRceError True for RCE error false otherwise.
 * @return SUCCESS
 */
int32_t AnalyzeFetchRcePue( ExtensibleChip * i_membChip,
                            STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos,
                            bool i_isRceError )
{
    #define PRDF_FUNC "[AnalyzeFetchRcePue] "

    int32_t l_rc = SUCCESS;

    ExtensibleChip * mbaChip = NULL;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
        mbaChip = membdb->getMbaChip( i_mbaPos );
        if ( NULL == mbaChip )
        {
            PRDF_ERR( PRDF_FUNC"getMbaChip() returned NULL" );
            l_rc = FAIL; break;
        }

        CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );

        CenAddr addr;
        if ( i_isRceError )
            l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_RCE_ADDR, addr );
        else
            l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_UE_ADDR, addr );

        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"getCenReadAddr() failed" );
            break;
        }
        CenRank rank = addr.getRank();

        // Callout the rank.
        MemoryMru memmru ( mbaChip->GetChipHandle(), rank,
                           MemoryMruData::CALLOUT_RANK );
        i_sc.service_data->SetCallout( memmru );

        // Add an entry to the RCE table.
        if ( mbadb->iv_rceTable.addEntry(rank, i_sc) )
        {
            // Add a TPS request to the queue TD queue.
            l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
                                             CenMbaTdCtlrCommon::TPS_EVENT );
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"handleTdEvent() failed." );
                break;
            }
        }

    } while (0);

    // Add ECC capture data for FFDC.
    if ( NULL != mbaChip )
        CenMbaCaptureData::addMemEccData( mbaChip, i_sc );

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_membChip=0x%08x i_mbaPos=%d "
                  "i_isRceError=%c", i_membChip->GetId(), i_mbaPos,
                  i_isRceError ? 'T' : 'F' );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS; // Intentionally return SUCCESS for this plugin

    #undef PRDF_FUNC
}
Beispiel #12
0
/**
 * @brief  MBSECCFIR[16] - Fetch New CE (NCE).
 * @param  i_membChip A Centaur chip.
 * @param  i_sc       The step code data struct.
 * @param  i_mbaPos   The MBA position.
 * @return SUCCESS
 */
int32_t AnalyzeFetchNce( ExtensibleChip * i_membChip,
                         STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos )
{
    #define PRDF_FUNC "[AnalyzeFetchNce] "

    int32_t l_rc = SUCCESS;

    ExtensibleChip * mbaChip = NULL;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
        mbaChip = membdb->getMbaChip( i_mbaPos );
        if ( NULL == mbaChip )
        {
            PRDF_ERR( PRDF_FUNC"getMbaChip() returned NULL" );
            l_rc = FAIL; break;
        }
        TargetHandle_t mbaTrgt = mbaChip->GetChipHandle();

        CenAddr addr;
        l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_NCE_ADDR, addr );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"getCenReadAddr() failed" );
            break;
        }
        CenRank rank = addr.getRank();

        if ( 0x20 > getChipLevel(i_membChip->GetChipHandle()) )
        {
            // There is a bug in DD1.x where the value of MBSEVR cannot be
            // trusted. The workaround is too complicated for its value so
            // callout the rank instead.
            MemoryMru memmru ( mbaTrgt, rank, MemoryMruData::CALLOUT_RANK );
            i_sc.service_data->SetCallout( memmru );
        }
        else // DD2.0+
        {
            // Get the failing symbol
            const char * reg_str = (0 == i_mbaPos) ? "MBA0_MBSEVR"
                                                   : "MBA1_MBSEVR";
            SCAN_COMM_REGISTER_CLASS * reg = i_membChip->getRegister(reg_str);
            l_rc = reg->Read();
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str );
                break;
            }

            uint8_t galois = reg->GetBitFieldJustified( 40, 8 );
            uint8_t mask   = reg->GetBitFieldJustified( 32, 8 );

            CenSymbol symbol = CenSymbol::fromGalois( mbaTrgt, rank, galois,
                                                      mask );
            if ( !symbol.isValid() )
            {
                PRDF_ERR( PRDF_FUNC"Failed to create symbol: galois=0x%02x "
                          "mask=0x%02x", galois, mask );
                break;
            }

            // Check if this symbol is on any of the spares.
            CenSymbol sp0, sp1, ecc;
            l_rc = mssGetSteerMux( mbaTrgt, rank, sp0, sp1, ecc );
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed. HUID: 0x%08x "
                        "rank: %d", getHuid(mbaTrgt), rank.getMaster() );
                break;
            }
            if ( (sp0.isValid() && (sp0.getDram() == symbol.getDram())) ||
                 (sp1.isValid() && (sp1.getDram() == symbol.getDram())) )
            {
                symbol.setDramSpared();
            }
            if ( ecc.isValid() && (ecc.getDram() == symbol.getDram()) )
            {
                symbol.setEccSpared();
            }

            // Add the DIMM to the callout list
            MemoryMru memmru ( mbaTrgt, rank, symbol );
            i_sc.service_data->SetCallout( memmru, MRU_MEDA );

            // Add to CE table
            CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
            uint32_t ceTableRc = mbadb->iv_ceTable.addEntry( addr, symbol );
            bool doTps = ( CenMbaCeTable::NO_TH_REACHED != ceTableRc );

            // Check MNFG thresholds, if needed.
            if ( mfgMode() )
            {
                // Get the MNFG CE thresholds.
                uint16_t dramTh, hrTh, dimmTh;
                l_rc = getMnfgMemCeTh( mbaChip, rank, dramTh, hrTh, dimmTh );
                if ( SUCCESS != l_rc )
                {
                    PRDF_ERR( PRDF_FUNC"getMnfgMemCeTh() failed: rank=m%ds%d",
                              rank.getMaster(), rank.getSlave() );
                    break;
                }

                // Get counts from CE table.
                uint32_t dramCount, hrCount, dimmCount;
                mbadb->iv_ceTable.getMnfgCounts( addr.getRank(), symbol,
                                                 dramCount, hrCount,
                                                 dimmCount );

                if ( dramTh < dramCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgDramCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( hrTh < hrCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgHrCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( dimmTh < dimmCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgDimmCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( 0 != (CenMbaCeTable::TABLE_FULL & ceTableRc) )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgTableFull);

                    // The table is full and no other threshold has been met.
                    // We are in a state where we may never hit a MNFG
                    // threshold. Callout all memory behind the MBA. Also, since
                    // the counts are all over the place, there may be a problem
                    // with the MBA. So call it out as well.
                    MemoryMru all_mm ( mbaTrgt, rank,
                                       MemoryMruData::CALLOUT_ALL_MEM );
                    i_sc.service_data->SetCallout( all_mm,  MRU_MEDA );
                    i_sc.service_data->SetCallout( mbaTrgt, MRU_MEDA );
                    i_sc.service_data->SetServiceCall();
                }
                else if ( 0 != (CenMbaCeTable::ENTRY_TH_REACHED & ceTableRc) )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgEntryCte );

                    // There is a single entry threshold and no other threshold
                    // has been met. This is a potential flooding issue, so make
                    // the DIMM callout predictive.
                    i_sc.service_data->SetServiceCall();
                }
            }

            // Initiate a TPS procedure, if needed.
            if ( doTps )
            {
                // If a MNFG threshold has been reached (predictive callout), we
                // will still try to start TPS just in case MNFG disables the
                // termination policy.

                // Will not be able to do TPS during hostboot. Note that we will
                // still call handleTdEvent() so we can get the trace statement
                // indicating TPS was requested during Hostboot.

                l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
                                                CenMbaTdCtlrCommon::TPS_EVENT );
                if ( SUCCESS != l_rc )
                {
                    PRDF_ERR( PRDF_FUNC"handleTdEvent() failed: rank=m%ds%d",
                              rank.getMaster(), rank.getSlave() );
                    break;
                }
            }
        }

    } while (0);

    // Add ECC capture data for FFDC.
    if ( NULL != mbaChip )
        CenMbaCaptureData::addMemEccData( mbaChip, i_sc );

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_membChip=0x%08x i_mbaPos=%d",
                  i_membChip->GetId(), i_mbaPos );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS; // Intentionally return SUCCESS for this plugin

    #undef PRDF_FUNC
}
Beispiel #13
0
/**
 * @brief  MBSECCFIR[0:7] - Fetch Mark Placed Event (MPE).
 * @param  i_membChip A Centaur chip.
 * @param  i_sc       The step code data struct.
 * @param  i_mbaPos   The MBA position.
 * @param  i_rank     The target rank.
 * @return SUCCESS
 */
int32_t AnalyzeFetchMpe( ExtensibleChip * i_membChip,
                         STEP_CODE_DATA_STRUCT & i_sc,
                         uint32_t i_mbaPos, uint8_t i_rank )
{
    #define PRDF_FUNC "[AnalyzeFetchMpe] "

    int32_t l_rc = SUCCESS;

    ExtensibleChip * mbaChip = NULL;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
        mbaChip = membdb->getMbaChip( i_mbaPos );
        if ( NULL == mbaChip )
        {
            PRDF_ERR( PRDF_FUNC"getMbaChip() returned NULL" );
            l_rc = FAIL; break;
        }

        CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
        TargetHandle_t mbaTrgt = mbaChip->GetChipHandle();

        CenAddr addr;
        l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_MPE_ADDR, addr );
        if ( SUCCESS != l_rc )
        {
           PRDF_ERR( PRDF_FUNC"getCenReadAddr() failed" );
           break;
        }

        // If the address does not match the rank that reported the attention,
        // there are multiple MPE attentions and the address was overwritten.
        // In this case, add an invalid dummy address to the UE table.
        if ( addr.getRank().getMaster() != i_rank )
        {
            addr = CenAddr( i_rank, 0, 0xffffffff, 0xffffffff, 0xffffffff );
        }

        mbadb->iv_ueTable.addEntry( UE_TABLE::FETCH_MPE, addr );

        // Get the current mark in hardware.
        CenRank rank ( addr.getRank() );
        CenMark mark;
        l_rc = mssGetMarkStore( mbaTrgt, rank, mark );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"mssGetMarkStore() failed");
            break;
        }

        if ( !mark.getCM().isValid() )
        {
            PRDF_ERR( PRDF_FUNC"FIR bit set but no valid chip mark" );
            l_rc = FAIL; break;
        }

        // Callout the mark.
        CalloutUtil::calloutMark( mbaTrgt, rank, mark, i_sc );

        // Tell TD controller to handle VCM event.
        l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
                                               CenMbaTdCtlrCommon::VCM_EVENT );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"handleTdEvent() failed." );
            break;
        }

    } while (0);

    // Add ECC capture data for FFDC.
    if ( NULL != mbaChip )
        CenMbaCaptureData::addMemEccData( mbaChip, i_sc );

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_membChip=0x%08x i_mbaPos=%d i_rank=%d",
                  i_membChip->GetId(), i_mbaPos, i_rank );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS; // Intentionally return SUCCESS for this plugin

    #undef PRDF_FUNC
}
Beispiel #14
0
/**
 * @brief Analysis code that is called before the main analyze() function.
 * @param i_mbChip A MEMBUF chip.
 * @param i_sc Step Code Data structure
 * @param o_analyzed TRUE if analysis has been done on this chip
 * @return failure or success
 */
int32_t PreAnalysis( ExtensibleChip * i_mbChip, STEP_CODE_DATA_STRUCT & i_sc,
                     bool & o_analyzed )
{
    #define PRDF_FUNC "[Membuf::PreAnalysis] "

    int32_t o_rc = SUCCESS;

    o_analyzed = false;

    // Get memory capture data.
    CaptureData & cd = i_sc.service_data->GetCaptureData();
    CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
    ExtensibleChip * mcsChip = mbdb->getMcsChip();
    if ( NULL != mcsChip )
    {
        mcsChip->CaptureErrorData( cd, Util::hashString("FirRegs") );
        mcsChip->CaptureErrorData( cd, Util::hashString("CerrRegs") );

        CenMbaCaptureData::addMemChipletFirRegs( i_mbChip, cd );
    }

    // Check for a Centaur Checkstop
    do
    {
        // Skip if we're already analyzing a unit checkstop
        if ( i_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) )
            break;

        // Skip if we're analyzing a special attention.
        // This is a required for a rare scenario when Centaur CS bit comes
        // up after attention has called PRD and PRD was still at start of
        // analysis.
        if ( SPECIAL == i_sc.service_data->GetAttentionType() )
            break;

        // MCIFIR[31] is not always reliable if the unit CS originated on the
        // Centaur. This is due to packets not getting forwarded to the MCS.
        // Instead, check for non-zero GLOBAL_CS_FIR.

        SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("GLOBAL_CS_FIR");
        o_rc = fir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Failed to read GLOBAL_CS_FIR on 0x%08x",
                      i_mbChip->GetId() );
            break;
        }

        if ( fir->BitStringIsZero() ) break; // No unit checkstop

        // Set Unit checkstop flag
        i_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS);
        i_sc.service_data->SetThresholdMaskId(0);

        // Set the cause attention type
        i_sc.service_data->SetCauseAttentionType(UNIT_CS);

    } while (0);

    return o_rc;

    #undef PRDF_FUNC
}
Beispiel #15
0
int32_t cleanupSecondaryFirBits( ExtensibleChip * i_chip,
                       TYPE i_busType,
                       uint32_t i_busPos )
{
    int32_t l_rc = SUCCESS;
    TargetHandle_t mcsTgt = NULL;
    TargetHandle_t mbTgt = NULL;
    ExtensibleChip * mcsChip = NULL;
    ExtensibleChip * mbChip = NULL;

    //In case of spare deployed attention for DMI bus, we need to clear
    // secondary MBIFIR[10] and MCIFIR[10] bits.
    do
    {
        if ( i_busType == TYPE_MCS )
        {
            mcsTgt = getConnectedChild( i_chip->GetChipHandle(),
                                        TYPE_MCS,
                                        i_busPos);
            if (!mcsTgt) break;
            mcsChip = ( ExtensibleChip * )systemPtr->GetChip( mcsTgt );
            if (!mcsChip) break;
            mbChip =  getMcsDataBundle( mcsChip )->getMembChip();
            if (!mbChip) break;
            mbTgt =   mbChip->GetChipHandle();
            if (!mbTgt) break;
        }
        else if ( i_busType == TYPE_MEMBUF )
        {
            mbTgt = i_chip->GetChipHandle();
            if (!mbTgt) break;
            mcsChip = getMembufDataBundle( i_chip )->getMcsChip();
            if (!mcsChip) break;
            mcsTgt  = mcsChip->GetChipHandle();
            if (!mcsTgt) break;
            mbChip = i_chip;
        }
        else
        {
            // We only need to clean secondary FIR bits for DMI bus
            l_rc = SUCCESS;
            break;
        }

        SCAN_COMM_REGISTER_CLASS * mciAnd = mcsChip->getRegister("MCIFIR_AND");
        SCAN_COMM_REGISTER_CLASS * mbiAnd = mbChip->getRegister( "MBIFIR_AND");

        mciAnd->setAllBits(); mciAnd->ClearBit(10);
        mbiAnd->setAllBits(); mbiAnd->ClearBit(10);

        l_rc  = mciAnd->Write();
        l_rc |= mbiAnd->Write();

        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( "[cleanupSecondaryFirBits] Write() failed on "
                      "MCIFIR/MBIFIR: MCS=0x%08x MEMB=0x%08x",
                      mcsChip->GetId(), mbChip->GetId() );
            break;
        }

    } while (0);

    return l_rc;
}