コード例 #1
0
int32_t CenMbaTdCtlrCommon::handleMCE_DSD2( STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::handleMCE_DSD2] "

    int32_t o_rc = SUCCESS;

    do
    {
        if ( DSD_PHASE_2 != iv_tdState )
        {
            PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" );
            o_rc = FAIL; break;
        }

        setTdSignature( io_sc, PRDFSIG_DsdBadSpare );
        io_sc.service_data->SetServiceCall();

        // Callout spare DRAM.
        MemoryMru memmru ( iv_mbaTrgt, iv_rank, iv_mark.getCM() );
        io_sc.service_data->SetCallout( memmru );

        // The spare DRAM is bad, so set it in VPD. At this point, the chip mark
        // should have already been set in the VPD because it was recently
        // verified.

        CenDqBitmap bitmap;
        o_rc = getBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed" );
            break;
        }
        if ( iv_isEccSteer )
        {
            bitmap.setEccSpare();
        }
        else
        {
            o_rc = bitmap.setDramSpare( iv_mark.getCM().getPortSlct() );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "setDramSpare() failed" );
                break;
            }
        }

        o_rc = setBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "setBadDqBitmap() failed" );
            break;
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
コード例 #2
0
ファイル: prdfCenMbaTdCtlr.C プロジェクト: rjknight/hostboot
int32_t CenMbaTdCtlr::handleUE( STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[CenMbaTdCtlr::handleUE] "

    using namespace CalloutUtil;

    int32_t o_rc = SUCCESS;

    iv_tdState = NO_OP; // Abort the TD procedure.

    setTdSignature( io_sc, PRDFSIG_MaintUE );
    io_sc.service_data->SetServiceCall();

    CenMbaDataBundle * mbadb = getMbaDataBundle( iv_mbaChip );

    do
    {
        // Clean up the maintenance command. This is needed just in case the UE
        // isolation procedure is modified to use maintenance commands.
        o_rc = cleanupPrevCmd();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"cleanupPrevCmd() failed" );
            break;
        }

        // Look for all failing bits on this rank.
        CenDqBitmap bitmap;
        o_rc = mssIplUeIsolation( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"mssIplUeIsolation() failed" );
            break;
        }

        // Add UE data to capture data.
        bitmap.getCaptureData( io_sc.service_data->GetCaptureData() );

        // Callout the failing DIMMs.
        TargetHandleList callouts;
        for ( int32_t ps = 0; ps < PORT_SLCT_PER_MBA; ps++ )
        {
            bool badDqs = false;
            o_rc = bitmap.badDqs( ps, badDqs );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC"badDqs(%d) failed", ps );
                break;
            }

            if ( !badDqs ) continue; // nothing to do.

            TargetHandleList dimms = getConnectedDimms(iv_mbaTrgt, iv_rank, ps);
            if ( 0 == dimms.size() )
            {
                PRDF_ERR( PRDF_FUNC"getConnectedDimms(%d) failed", ps );
                o_rc = FAIL; break;
            }

            callouts.insert( callouts.end(), dimms.begin(), dimms.end() );

            if ( isMfgCeCheckingEnabled() )
            {
                // As we are doing callout for UE, we dont need to do callout
                // during CE for this rank on given port
                mbadb->getIplCeStats()->banAnalysis( iv_rank, ps );
            }
        }
        if ( SUCCESS != o_rc ) break;

        if ( 0 == callouts.size() )
        {
            // It is possible the scrub counters have rolled over to zero due to
            // a known DD1.0 hardware bug. In this case, the best we can do is
            // callout both DIMMs, because at minimum we know there was a UE, we
            // just don't know where.
            // NOTE: If this condition happens because of a DD2.0+ bug, the
            //       mssIplUeIsolation procedure will callout the Centaur.
            callouts = getConnectedDimms( iv_mbaTrgt, iv_rank );
            if ( 0 == callouts.size() )
            {
                PRDF_ERR( PRDF_FUNC"getConnectedDimms() failed" );
                o_rc = FAIL; break;
            }

            if ( isMfgCeCheckingEnabled() )
            {
                // As we are doing callout for UE, we dont need to do callout
                // during CE for this rank on both port
                mbadb->getIplCeStats()->banAnalysis( iv_rank);
            }
        }

        // Callout all DIMMs in the list.
        for ( TargetHandleList::iterator i = callouts.begin();
              i != callouts.end(); i++ )
        {
            io_sc.service_data->SetCallout( *i, MRU_HIGH );
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
コード例 #3
0
int32_t CenMbaTdCtlrCommon::handleMCE_VCM2( STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::handleMCE_VCM2] "

    using namespace fapi; // For spare config macros.

    int32_t o_rc = SUCCESS;

    iv_isEccSteer = false;

    do
    {
        if ( VCM_PHASE_2 != iv_tdState )
        {
            PRDF_ERR( PRDF_FUNC "Invalid state machine configuration" );
            o_rc = FAIL; break;
        }

        setTdSignature( io_sc, PRDFSIG_VcmVerified );

        if ( areDramRepairsDisabled() )
        {
            iv_tdState = NO_OP; // The TD procedure is complete.

            io_sc.service_data->SetServiceCall();

            break; // nothing else to do.
        }

        // If there is a symbol mark on the same DRAM as the newly verified chip
        // mark, remove the symbol mark.
        const uint8_t cmDram = iv_mark.getCM().getDram();
        if ( cmDram == iv_mark.getSM().getDram() )
        {
            iv_mark.clearSM();
            bool blocked; // Won't be blocked because chip mark is in place.
            o_rc = mssSetMarkStore( iv_mbaTrgt, iv_rank, iv_mark, blocked );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "mssSetMarkStore() failed" );
                break;
            }
        }

        bool startDsdProcedure = false;

        // Read VPD.
        CenDqBitmap bitmap;
        o_rc = getBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed" );
            break;
        }

        // The chip mark is considered verified, so set it in VPD.
        o_rc = bitmap.setDram( iv_mark.getCM() );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "setDram() failed" );
            break;
        }

        uint8_t ps = iv_mark.getCM().getPortSlct();
        uint8_t spareConfig = ENUM_ATTR_VPD_DIMM_SPARE_NO_SPARE;
        o_rc = getDimmSpareConfig( iv_mbaTrgt, iv_rank, ps,
                                   spareConfig );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "getDimmSpareConfig() failed" );
            break;
        }

        // Check if DRAM spare is present. Also, ECC spares are available on all
        // x4 DIMMS.
        if ( ( ENUM_ATTR_VPD_DIMM_SPARE_NO_SPARE != spareConfig ) || iv_x4Dimm )
        {
            // Get the current spares in hardware.
            CenSymbol sp0, sp1, ecc;
            o_rc = mssGetSteerMux( iv_mbaTrgt, iv_rank, sp0, sp1, ecc );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed" );
                break;
            }

            // If the verified chip mark is on a spare then the spare is bad and
            // hardware can not steer it to another DRAM even if one is
            // available (e.g. ECC spare). In this this case, make error log
            // predictive (remember that the chip mark has already been added to
            // the callout list.
            if ( ( cmDram == (0 == ps ? sp0.getDram() : sp1.getDram()) ) ||
                 ( cmDram == ecc.getDram() ) )
            {
                setTdSignature( io_sc, PRDFSIG_VcmBadSpare );
                io_sc.service_data->SetServiceCall();
            }
            else
            {
                // Certain DIMMs may have had spares intentially made
                // unavailable by the manufacturer. Check the VPD for available
                // spares. Note that a x4 DIMM has DRAM spares and ECC spares,
                // so check for availability on both.
                bool dramSparePossible = false;
                bool eccSparePossible  = false;
                o_rc = bitmap.isSpareAvailable( ps, dramSparePossible,
                                                eccSparePossible );
                if ( SUCCESS != o_rc )
                {
                    PRDF_ERR( PRDF_FUNC "isDramSpareAvailable() failed" );
                    break;
                }

                if ( dramSparePossible &&
                     (0 == ps ? !sp0.isValid() : !sp1.isValid()) )
                {
                    // A spare DRAM is available.
                    startDsdProcedure = true;
                }
                else if ( eccSparePossible && !ecc.isValid() )
                {
                    startDsdProcedure = true;
                    iv_isEccSteer = true;
                }
                else
                {
                    // Chip mark is in place and sparing is not possible.
                    setTdSignature( io_sc, PRDFSIG_VcmCmAndSpare );
                    io_sc.service_data->SetServiceCall();
                }
            }
        }
        else // DRAM spare not supported.
        {
            // Not able to do dram sparing. If there is a symbol mark, there are
            // no repairs available so call it out and set the error log to
            // predictive.
            if ( iv_mark.getSM().isValid() )
            {
                setTdSignature( io_sc, PRDFSIG_VcmCmAndSm );
                io_sc.service_data->SetServiceCall();
            }
        }

        // Write VPD.
        o_rc = setBadDqBitmap( iv_mbaTrgt, iv_rank, bitmap );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "setBadDqBitmap() failed" );
            break;
        }

        // Start DSD Phase 1, if possible.
        if ( startDsdProcedure )
        {
            o_rc = startDsdPhase1( io_sc );
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "startDsdPhase1() failed" );
                break;
            }
        }
        else
        {
            iv_tdState = NO_OP; // The TD procedure is complete.
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
コード例 #4
0
void captureDramRepairsVpd( TargetHandle_t i_mbaTrgt, CaptureData & io_cd )
{
    #define PRDF_FUNC "[captureDramRepairsVpd] "

    // Get the maximum capture data size.
    static const size_t sz_rank  = sizeof(uint8_t);
    static const size_t sz_entry = PORT_SLCT_PER_MBA * DIMM_DQ_RANK_BITMAP_SIZE;
    static const size_t sz_word  = sizeof(CPU_WORD);
    int32_t rc = SUCCESS;

    do
    {
        std::vector<CenRank> masterRanks;
        rc = getMasterRanks( i_mbaTrgt, masterRanks );
        if ( SUCCESS != rc )
        {
            PRDF_ERR( PRDF_FUNC "getMasterRanks() failed" );
            break;
        }

        if( masterRanks.empty() )
        {
            PRDF_ERR( PRDF_FUNC "Master Rank list size is 0");
            break;
        }

        // Get the maximum capture data size.
        size_t sz_maxData = masterRanks.size() * (sz_rank + sz_entry);

        // Adjust the size for endianness.
        sz_maxData = ((sz_maxData + sz_word-1) / sz_word) * sz_word;

        // Initialize to 0.
        uint8_t capData[sz_maxData];
        memset( capData, 0x00, sz_maxData );

        // Iterate all ranks to get VPD data
        uint32_t idx = 0;
        for ( std::vector<CenRank>::iterator it = masterRanks.begin();
              it != masterRanks.end(); it++ )
        {
            CenDqBitmap bitmap;
            uint8_t rank = it->getMaster();

            if ( SUCCESS != getBadDqBitmap(i_mbaTrgt, *it, bitmap, true) )
            {
                PRDF_ERR( PRDF_FUNC "getBadDqBitmap() failed: MBA=0x%08x"
                          " rank=%d", getHuid(i_mbaTrgt), rank );
                continue; // skip this rank
            }

            if ( bitmap.badDqs() ) // make sure the data is non-zero
            {
                // Add the rank, then the entry data.
                capData[idx] = rank;              idx += sz_rank;
                memcpy(&capData[idx], bitmap.getData(), sz_entry);
                idx += sz_entry;
            }
        }

        if( 0 == idx ) break; // Nothing to capture

        // Fix endianness issues with non PPC machines.
        size_t sz_capData = idx;
        sz_capData = ((sz_capData + sz_word-1) / sz_word) * sz_word;
        for ( uint32_t i = 0; i < (sz_capData/sz_word); i++ )
            ((CPU_WORD*)capData)[i] = htonl(((CPU_WORD*)capData)[i]);

        // Add data to capture data.
        BIT_STRING_ADDRESS_CLASS bs ( 0, sz_capData*8, (CPU_WORD *) &capData );
        io_cd.Add( i_mbaTrgt, Util::hashString("DRAM_REPAIRS_VPD"), bs );

    }while(0);

    if( FAIL == rc )
        PRDF_ERR( PRDF_FUNC "Failed for MBA 0x%08X", getHuid( i_mbaTrgt ) );

    #undef PRDF_FUNC
}