Ejemplo n.º 1
0
/**
 * @fn MaskMbsSecondaryBits
 * @brief Mask MBS secondary Fir bits which may come up because of L4 UE.
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.
 */
int32_t MaskMbsSecondaryBits( ExtensibleChip * i_chip,
                              STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[MaskMbsSecondaryBits] "

    int32_t l_rc = SUCCESS;
    do
    {
        SCAN_COMM_REGISTER_CLASS * mbsFirMaskOr =
                                        i_chip->getRegister("MBSFIR_MASK_OR");
        mbsFirMaskOr->SetBit(27);
        l_rc = mbsFirMaskOr->Write();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MBSFIR_MASK_OR write failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

    }while( 0 );

    return SUCCESS;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, MaskMbsSecondaryBits );
Ejemplo n.º 2
0
/**
 * @brief   Captures trapped address for L4 cache ECC errors.
 * @param   i_mbChip Centaur chip
 * @param   i_sc     Step code data struct
 * @returns SUCCESS always
 * @note    This function also reset ECC trapped address regsiters so that HW
 *          can capture address for next L4 ecc error.
 */
int32_t CaptureL4CacheErr( ExtensibleChip * i_mbChip,
                           STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[CaptureL4CacheErr] "
    do
    {
        i_mbChip->CaptureErrorData( i_sc.service_data->GetCaptureData(),
                                    Util::hashString( "L4CacheErr" ) );

        // NOTE: FW should write on MBCELOG so that HW can capture
        // address for next L4 CE error.

        SCAN_COMM_REGISTER_CLASS * mbcelogReg =
                                i_mbChip->getRegister("MBCELOG");
        mbcelogReg->clearAllBits();

        if ( SUCCESS != mbcelogReg->Write() )
        {
            PRDF_ERR( PRDF_FUNC"MBCELOG write failed for 0x%08x",
                      i_mbChip->GetId());
            break;
        }
    }while( 0 );

    return SUCCESS;
}
Ejemplo n.º 3
0
int32_t CenMbaTdCtlrCommon::cleanupPrevCmd()
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::cleanupPrevCmd] "

    int32_t o_rc = SUCCESS;

    // Clean up the current maintenance command. This must be done whenever
    // maintenance command will no longer be executed.
    if ( NULL != iv_mssCmd )
    {
        o_rc = iv_mssCmd->cleanupCmd();
        if ( SUCCESS != o_rc )
            PRDF_ERR( PRDF_FUNC "cleanupCmd() failed" );

        delete iv_mssCmd; iv_mssCmd = NULL;
    }

    // Clear the command complete attention. This must be done before starting
    // the next maintenance command.
    SCAN_COMM_REGISTER_CLASS * firand = iv_mbaChip->getRegister("MBASPA_AND");
    firand->setAllBits();

    firand->ClearBit(0); // Maintenance command complete
    firand->ClearBit(8); // Maintenance command complete (DD1.0 workaround)

    if ( SUCCESS != firand->Write() )
    {
        PRDF_ERR( PRDF_FUNC "Write() failed on MBASPA_AND" );
        o_rc = FAIL;
    }

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 4
0
/**
 * @fn MaskMbaCalSecondaryBits
 * @brief Mask MBACAL secondary Fir bits which may come up because of L4 UE.
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.
 */
int32_t MaskMbaCalSecondaryBits( ExtensibleChip * i_chip,
                                 STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[MaskMbaCalSecondaryBits ] "
    int32_t l_rc = SUCCESS;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_chip );

        for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = membdb->getMbaChip(i);
            if ( NULL == mbaChip ) continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalFirMaskOr =
                                mbaChip->getRegister("MBACALFIR_MASK_OR");

            mbaCalFirMaskOr->SetBit(9);
            mbaCalFirMaskOr->SetBit(15);
            l_rc = mbaCalFirMaskOr->Write();
            if ( SUCCESS != l_rc )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR_MASK_OR write failed"
                         "for 0x%08x", mbaChip->GetId());
            }
        }
    }while( 0 );

    return SUCCESS;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, MaskMbaCalSecondaryBits );
Ejemplo n.º 5
0
int32_t CenMbaTdCtlrCommon::chipMarkCleanup()
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::chipMarkCleanup] "

    int32_t o_rc = SUCCESS;

    do
    {
        SCAN_COMM_REGISTER_CLASS * ddrPhyAndFir =
                                 iv_mbaChip->getRegister( "MBADDRPHYFIR_AND" );
        ddrPhyAndFir->setAllBits();

        ddrPhyAndFir->ClearBit(50); // Calibration Error RE 0
        ddrPhyAndFir->ClearBit(58); // Calibration Error RE 1

        o_rc = ddrPhyAndFir->Write();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Write() failed on MBADDRPHYFIR_AND" );
            break;
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 6
0
int32_t checkMcsChannelFail( ExtensibleChip * i_mcsChip,
                             STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[MemUtils::checkMcsChannelFail] "

    int32_t o_rc = SUCCESS;

    do
    {
        // Skip if already handling unit checkstop.
        if ( io_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) )
            break;

        // Must be an MCS.
        if ( TYPE_MCS != getTargetType(i_mcsChip->GetChipHandle()) )
        {
            PRDF_ERR( PRDF_FUNC "i_mcsChip is not TYPE_MCS" );
            o_rc = FAIL; break;
        }

        // Check MCIFIR[31] for presence of channel fail.
        SCAN_COMM_REGISTER_CLASS * mcifir = i_mcsChip->getRegister("MCIFIR");
        o_rc = mcifir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Read() failed on MCIFIR" );
            break;
        }

        if ( !mcifir->IsBitSet(31) ) break; // No channel fail, so exit.

        // Set unit checkstop flag and cause attention type.
        io_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS);
        io_sc.service_data->setSecondaryAttnType(UNIT_CS);
        io_sc.service_data->SetThresholdMaskId(0);

        // Indicate that cleanup is required.
        P8McsDataBundle * mcsdb = getMcsDataBundle( i_mcsChip );
        ExtensibleChip * membChip = mcsdb->getMembChip();
        if ( NULL == membChip )
        {
            PRDF_ERR( PRDF_FUNC "getMembChip() returned NULL" );
            o_rc = FAIL; break;
        }
        CenMembufDataBundle * mbdb = getMembufDataBundle( membChip );
        mbdb->iv_doChnlFailCleanup = true;

    } while (0);

    if ( SUCCESS != o_rc )
    {
        PRDF_ERR( PRDF_FUNC "Failed: i_mcsChip=0x%08x", i_mcsChip->GetId() );
    }

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 7
0
void CaptureData::Add( TargetHandle_t i_trgt, int32_t i_scomId,
                       SCAN_COMM_REGISTER_CLASS & io_scr,
                       Place i_place, RegType i_type )
{
    if ( SUCCESS == io_scr.Read() )
    {
        AddDataElement( i_trgt, i_scomId, io_scr.GetBitString(),
                        i_place, i_type );
    }
}
Ejemplo n.º 8
0
uint32_t getIoOscPos( ExtensibleChip * i_chip,
                      STEP_CODE_DATA_STRUCT & io_sc)
{
    #define PRDF_FUNC "[PLL::getIoOscPos] "
    uint32_t o_oscPos = MAX_PCIE_OSC_PER_NODE;

    do
    {
        int32_t rc = SUCCESS;

        // START WORKAROUND
        // TODO: RTC 137711 - This redundant clock code only applies to Brazos
        //       systems. Unfortunately, this code made it into the common
        //       source and we ran into SW324506 where we are unable to SCOM
        //       PCIE_OSC_SWITCH during OP checkstop analysis. We should have
        //       a system attribute that tells us if redundant clock are enabled
        //       but for now just assume anything that is OPAL based will not
        //       have redundant clocks. Note that we still need this code in
        //       Hostboot (not HBRT) because Hostboot is still run on a Brazos
        //       system.
        if ( isHyprConfigOpal() )
        {
            o_oscPos = 0;
            break;
        }
        // END WORKAROUND

        SCAN_COMM_REGISTER_CLASS * pcieOscSwitchReg =
                i_chip->getRegister("PCIE_OSC_SWITCH");

        rc = pcieOscSwitchReg->Read();
        if (rc != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "PCIE_OSC_SWITCH read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        // [ 16 ] == 1    ( OSC 0 is active )
        // [ 16 ] == 0    ( OSC 1 is active )
        if(pcieOscSwitchReg->IsBitSet(16))
        {
            o_oscPos = 0;
        }
        else
        {
            o_oscPos = 1;
        }

    } while(0);

    return o_oscPos;

    #undef PRDF_FUNC
}
Ejemplo n.º 9
0
/**
 * @brief Call to check for configured PHB (before capturing FFDC)
 * @param  i_chip             P8 chip
 * @param  i_phbPos           PHB position
 * @param  o_isPhbConfigured  set to true if the PHB configured
 * @returns Success
 */
int32_t phbConfigured(ExtensibleChip * i_chip,
                      uint32_t i_phbPos,
                      bool & o_isPhbConfigured)
{
    #define PRDF_FUNC "[Proc::phbConfigured] "

    static const uint32_t MAX_PCI_NUM = 3;
    static const char * pciEtuResetReg[MAX_PCI_NUM] =
                                         { "PCI_ETU_RESET_0",
                                           "PCI_ETU_RESET_1",
                                           "PCI_ETU_RESET_2" };
    int32_t o_rc = SUCCESS;
    o_isPhbConfigured = false;

    do
    {
        if( i_phbPos >= MAX_PCI_NUM )
        {
            PRDF_ERR( PRDF_FUNC"invalid PCI number: %d", i_phbPos );
            break;
        }

        SCAN_COMM_REGISTER_CLASS * etuResetReg =
            i_chip->getRegister( pciEtuResetReg[i_phbPos] );

        if(NULL == etuResetReg)
        {
            PRDF_ERR( PRDF_FUNC"getRegister() Failed for register:%s",
                         pciEtuResetReg[i_phbPos] );
            break;
        }

        o_rc = etuResetReg->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"%s Read() failed. Target=0x%08x",
                      pciEtuResetReg[i_phbPos], i_chip->GetId() );
            break;
        }

        // If bit 0 is cleared then the PHB is configured
        if ( ! etuResetReg->IsBitSet(0) )
        {
            o_isPhbConfigured = true;
        }

    } while(0);

    return SUCCESS;

    #undef PRDF_FUNC
}
Ejemplo n.º 10
0
/**
 * @brief Call to check for configured PHB (before capturing FFDC)
 * @param  i_chip             P8 chip
 * @param  i_phbPos           PHB position
 * @param  o_isPhbConfigured  set to true if the PHB configured
 * @returns Success
 */
int32_t phbConfigured( ExtensibleChip * i_chip, uint32_t i_phbPos,
                       bool & o_isPhbConfigured )
{
    #define PRDF_FUNC "[Proc::phbConfigured] "

    o_isPhbConfigured = false;

    uint32_t maxPhbs = 3; // Murano/Venice
    if ( MODEL_NAPLES == getProcModel(i_chip->GetChipHandle()) )
        maxPhbs = 4;

    do
    {
        if ( maxPhbs <= i_phbPos )
        {
            // This PHB doesn't exist, return false
            break;
        }

        char reg_str[64];
        snprintf( reg_str, 64, "PCI_ETU_RESET_%d", i_phbPos );

        SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister( reg_str );
        if ( NULL == reg )
        {
            PRDF_ERR( PRDF_FUNC"getRegister() failed for %s", reg_str );
            break;
        }

        int32_t l_rc = reg->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"Read() failed for %s: target=0x%08x",
                      reg_str, i_chip->GetId() );
            break;
        }

        // If bit 0 is cleared then the PHB is configured
        if ( !reg->IsBitSet(0) )
        {
            o_isPhbConfigured = true;
        }

    } while(0);

    return SUCCESS;

    #undef PRDF_FUNC
}
Ejemplo n.º 11
0
int32_t checkMcsChannelFail( ExtensibleChip * i_mcsChip,
                             STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[MemUtils::checkMcsChannelFail] "

    int32_t o_rc = SUCCESS;

    do
    {
        // Skip if already handling unit checkstop.
        if ( io_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) )
            break;

        // Must be an MCS.
        if ( TYPE_MCS != getTargetType(i_mcsChip->GetChipHandle()) )
        {
            PRDF_ERR( PRDF_FUNC"i_mcsChip is not TYPE_MCS" );
            o_rc = FAIL; break;
        }

        // Check MCIFIR[31] for presence of channel fail.
        SCAN_COMM_REGISTER_CLASS * mcifir = i_mcsChip->getRegister("MCIFIR");
        o_rc = mcifir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Read() failed on MCIFIR" );
            break;
        }

        if ( !mcifir->IsBitSet(31) ) break; // No channel fail, so exit.

        // Set unit checkstop flag and cause attention type.
        io_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS);
        io_sc.service_data->SetCauseAttentionType(UNIT_CS);
        io_sc.service_data->SetThresholdMaskId(0);

    } while (0);

    if ( SUCCESS != o_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_mcsChip=0x%08x", i_mcsChip->GetId() );
    }

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 12
0
int32_t clearPerSymbolCounters( ExtensibleChip * i_membChip, uint32_t i_mbaPos )
{
    #define PRDF_FUNC "[MemUtils::clearPerSymbolCounters] "

    int32_t o_rc = SUCCESS;

    do
    {
        if ( MAX_MBA_PER_MEMBUF <= i_mbaPos )
        {
            PRDF_ERR( PRDF_FUNC "i_mbaPos %d is invalid", i_mbaPos );
            o_rc = FAIL;
            break;
        }

        const char * reg_str = NULL;
        SCAN_COMM_REGISTER_CLASS * reg = NULL;

        for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_MBA; regIdx++ )
        {
            reg_str = mbsCeStatReg[i_mbaPos][regIdx];
            reg     = i_membChip->getRegister( reg_str );

            reg->clearAllBits();

            o_rc = reg->Write();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str );
                break;
            }
        }

        if ( SUCCESS != o_rc ) break;

    } while(0);

    if ( SUCCESS != o_rc )
    {
        PRDF_ERR( PRDF_FUNC "Failed. i_membChip=0x%08x i_mbaPos=%d",
                  i_membChip->GetId(), i_mbaPos );
    }

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 13
0
/**
 * @brief  Plugin to mask the side effects of an RCD parity error
 * @param  i_mbaChip A Centaur MBA chip.
 * @param  i_sc      The step code data struct.
 * @return SUCCESS
 */
int32_t maskRcdParitySideEffects( ExtensibleChip * i_mbaChip,
                                    STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[maskRcdParitySideEffects] "

    int32_t l_rc = SUCCESS;

    do
    {
        //use a data bundle to get the membuf chip
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membChip = mbadb->getMembChip();
        if (NULL == membChip)
        {
            PRDF_ERR(PRDF_FUNC "getMembChip() failed");
            break;
        }

        //get the masks for each FIR
        SCAN_COMM_REGISTER_CLASS * mbsFirMaskOr =
            membChip->getRegister("MBSFIR_MASK_OR");
        SCAN_COMM_REGISTER_CLASS * mbaCalMaskOr =
            i_mbaChip->getRegister("MBACALFIR_MASK_OR");
        SCAN_COMM_REGISTER_CLASS * mbaFirMaskOr =
            i_mbaChip->getRegister("MBAFIR_MASK_OR");

        mbaFirMaskOr->SetBit(2);
        mbaCalMaskOr->SetBit(2);
        mbaCalMaskOr->SetBit(17);
        mbsFirMaskOr->SetBit(4);

        l_rc =  mbaFirMaskOr->Write();
        l_rc |= mbaCalMaskOr->Write();
        l_rc |= mbsFirMaskOr->Write();

        if (SUCCESS != l_rc)
        {
            PRDF_ERR(PRDF_FUNC "MBAFIR_MASK_OR/MBACALFIR_MASK_OR/MBSFIR_MASK_OR"
                    " write failed for 0x%08x", i_mbaChip->GetId());
            break;
        }
    }while(0);

    return SUCCESS;
    #undef PRDF_FUNC
}
Ejemplo n.º 14
0
/**
 * @brief Handles MCS Channel fail bits, if they exist.
 *
 * @param  i_membChip   The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 *
 * @return SUCCESS if MCS channel fail is present and properly
 *         handled, FAIL otherwise.
 */
int32_t handleMcsChnlCs( ExtensibleChip * i_membChip,
                    STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[handleMcsChnlCs] "

    // We will return FAIL from this function if MCS channel fail  bits
    // are not set. If MCS channel fail bits are set, we will try to analyze
    // Mcs. If MCS is not analyzed properly, we will return FAIL.
    // This will trigger rule code to execute alternate resolution.

    int32_t l_rc = SUCCESS;
    do
    {
        CenMembufDataBundle * mbdb = getMembufDataBundle( i_membChip );
        ExtensibleChip * mcsChip =    mbdb->getMcsChip();
        if( NULL == mcsChip )
        {
            l_rc = FAIL;
            break;
        }

        SCAN_COMM_REGISTER_CLASS * mciFir = mcsChip->getRegister("MCIFIR");
        SCAN_COMM_REGISTER_CLASS * mciFirMask =
                                        mcsChip->getRegister("MCIFIR_MASK");

        l_rc = mciFir->Read();
        l_rc |= mciFirMask->Read();

        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MCIFIR/MCIFIR_MASK read failed for 0x%08x",
                      mcsChip->GetId());
            break;
        }

        // If any of MCS channel fail bit is set, we will analyze
        // MCS. It is safe to do hard coded check as channel fail
        // bits are hard wired and and they can not change without HW
        // change.
        // bits 0,1, 6, 8, 9, 22, 23, 40 are channel fail bits.
        uint64_t chnlCsBitsMask = 0xC2C0030000800000ull;
        uint64_t mciFirBits     = mciFir->GetBitFieldJustified(0, 64);
        uint64_t mciFirMaskBits = mciFirMask->GetBitFieldJustified(0, 64);

        if ( mciFirBits & ~mciFirMaskBits & chnlCsBitsMask )
        {
            l_rc = mcsChip->Analyze( i_sc,
                        i_sc.service_data->GetCauseAttentionType() );

            if( SUCCESS == l_rc ) break;
        }

        l_rc = FAIL;

    }while( 0 );

    return l_rc;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, handleMcsChnlCs );
Ejemplo n.º 15
0
int32_t CenMbaTdCtlrCommon::setRtEteThresholds()
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::setRtEteThresholds] "

    int32_t o_rc = SUCCESS;

    do
    {
        const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR";
        SCAN_COMM_REGISTER_CLASS * mbstr = iv_membChip->getRegister( reg_str );

        // MBSTR's content could be modified from cleanupCmd()
        // so we need to refresh
        o_rc = mbstr->ForceRead();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "ForceRead() failed on %s", reg_str );
            break;
        }

        uint16_t softIntCe = 0;
        o_rc = getScrubCeThreshold( iv_mbaChip, iv_rank, softIntCe );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "getScrubCeThreshold() failed." );
            break;
        }

        // Only care about retry CEs if there are a lot of them. So the
        // threshold will be high in the field. However, in MNFG the retry CEs
        // will be handled differently by putting every occurrence in the RCE
        // table and doing targeted diagnostics when needed.
        uint16_t retryCe = mfgMode() ? 1 : 2047;

        uint16_t hardCe = 1; // Always stop on first occurrence.

        mbstr->SetBitFieldJustified(  4, 12, softIntCe );
        mbstr->SetBitFieldJustified( 16, 12, softIntCe );
        mbstr->SetBitFieldJustified( 28, 12, hardCe    );
        mbstr->SetBitFieldJustified( 40, 12, retryCe   );

        // Set the per symbol counters to count hard CEs only. This is so that
        // when the scrub stops on the first hard CE, we can use the per symbol
        // counters to tell us which symbol reported the hard CE.
        mbstr->SetBitFieldJustified( 55, 3, 0x1 );

        o_rc = mbstr->Write();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str );
            break;
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 16
0
// Do the setup for mnfg IPL CE
int32_t CenMbaTdCtlr::mnfgCeSetup()
{
    #define PRDF_FUNC "[CenMbaTdCtlr::mnfgCeSetup] "

    int32_t o_rc = SUCCESS;

    do
    {
        const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR";
        SCAN_COMM_REGISTER_CLASS * mbstr = iv_membChip->getRegister( reg_str );
        // MBSTR's content could be modified from cleanupCmd()
        // so we need to refresh
        o_rc = mbstr->ForceRead();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str );
            break;
        }

        if ( TPS_PHASE_1 == iv_tdState )
        {
            //  Enable per-symbol error counters to count soft CEs
            mbstr->SetBit(55);
            mbstr->SetBit(56);
            // Disable per-symbol error counters to count hard CEs
            mbstr->ClearBit(57);
        }
        else if ( TPS_PHASE_2 == iv_tdState )
        {
            //  Disable per-symbol error counters to count soft CEs
            mbstr->ClearBit(55);
            mbstr->ClearBit(56);
            //  Enable per-symbol error counters to count hard CEs
            mbstr->SetBit(57);
        }
        else
        {
            PRDF_ERR( PRDF_FUNC"Inavlid State:%u", iv_tdState );
            o_rc = FAIL; break;
        }

        o_rc = mbstr->Write();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Write() failed on %s", reg_str );
            break;
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 17
0
int32_t getDramSize( ExtensibleChip *i_mbaChip, uint8_t & o_size )
{
    #define PRDF_FUNC "[MemUtils::getDramSize] "

    int32_t o_rc = SUCCESS;
    o_size = SIZE_2GB;

    do
    {
        TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle();
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membufChip = mbadb->getMembChip();
        if ( NULL == membufChip )
        {
            PRDF_ERR( PRDF_FUNC "getMembChip() failed: MBA=0x%08x",
                      getHuid(mbaTrgt) );
            o_rc = FAIL; break;
        }

        uint32_t pos = getTargetPosition(mbaTrgt);
        const char * reg_str = (0 == pos) ? "MBA0_MBAXCR" : "MBA1_MBAXCR";

        SCAN_COMM_REGISTER_CLASS * reg = membufChip->getRegister( reg_str );
        o_rc = reg->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Read() failed on %s. Target=0x%08x",
                      reg_str, getHuid(mbaTrgt) );
            break;
        }
        o_size = reg->GetBitFieldJustified( 6, 2 );

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 18
0
bool isSpareBitOnDMIBus( ExtensibleChip * i_mcsChip, ExtensibleChip * i_mbChip )
{
    bool bitOn = false;

    do
    {
        // If any of these object is NULL, spare bit should not be on.
        if ( ( NULL == i_mcsChip ) || ( NULL == i_mbChip ))
            break;

        // check spare deployed bit on Centaur side
        SCAN_COMM_REGISTER_CLASS * dmiFir = i_mbChip->getRegister( "DMIFIR" );
        int32_t rc = dmiFir->Read();
        if ( SUCCESS != rc )
        {
            PRDF_ERR("isSpareBitOnDMIBus() : Failed to read DMIFIR."
                      "MEMBUF: 0x%08X", getHuid( i_mbChip->GetChipHandle()) );
            break;
        }
        if ( dmiFir->IsBitSet( 9 ))
        {
            bitOn = true;
            break;
        }

        // check spare deployed bit on Proc side
        TargetHandle_t mcsTgt = i_mcsChip->GetChipHandle();
        TargetHandle_t procTgt = getConnectedParent( mcsTgt, TYPE_PROC );
        ExtensibleChip * procChip =
                        ( ExtensibleChip * )systemPtr->GetChip( procTgt );

        uint32_t mcsPos = getTargetPosition( mcsTgt );

        const char * regStr = ( 4 > mcsPos) ? "IOMCFIR_0" : "IOMCFIR_1";
        SCAN_COMM_REGISTER_CLASS * iomcFir = procChip->getRegister( regStr );
        rc = iomcFir->Read();
        if ( SUCCESS != rc )
        {
            PRDF_ERR("isSpareBitOnDMIBus() : Failed to read %s."
                      "MCS: 0x%08X", regStr, getHuid(mcsTgt) );
            break;
        }
        // Bit 9, 17, 25 and 33 are for spare deployed.
        // Check bit corrosponding to MCS position
        uint8_t bitPos = 9 + ( mcsPos % 4 ) *8;
        if ( iomcFir->IsBitSet(bitPos))
        {
            bitOn = true;
        }

    }while(0);

    return bitOn;
}
Ejemplo n.º 19
0
/**
  * @brief Query the PLL chip for a PCI PLL error
  * @param i_chip P8 Pci chip
  * @param o_result set to true in the presence of PLL error
  * @returns Failure or Success of query.
  */
int32_t QueryPciPll( ExtensibleChip * i_chip,
                        bool & o_result)
{
    #define PRDF_FUNC "[Proc::QueryPciPll] "

    int32_t rc = SUCCESS;
    o_result = false;

    SCAN_COMM_REGISTER_CLASS * pciErrReg =
                i_chip->getRegister("PCI_ERROR_REG");
    SCAN_COMM_REGISTER_CLASS * pciConfigReg =
                i_chip->getRegister("PCI_CONFIG_REG");

    do
    {
        rc = pciErrReg->Read();
        if (rc != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "PCI_ERROR_REG read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        rc = pciConfigReg->Read();
        if (rc != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "PCI_CONFIG_REG read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        if(pciErrReg->IsBitSet(PLL_ERROR_BIT) &&
           !pciConfigReg->IsBitSet(PLL_ERROR_MASK))
        {
            o_result = true;
        }

    } while(0);

    if( rc != SUCCESS )
    {
        PRDF_ERR(PRDF_FUNC "failed for proc: 0x%.8X",
                 i_chip->GetId());
    }

    return rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 20
0
/**
  * @brief Mask the PLL error for P8 Plugin
  * @param  i_chip P8 chip
  * @param  i_sc   The step code data struct
  * @param  i_oscPos active osc position
  * @returns Failure or Success of query.
  * @note
  */
int32_t MaskPllIo( ExtensibleChip * i_chip,
                 STEP_CODE_DATA_STRUCT & i_sc,
                 uint32_t i_oscPos )
{
    #define PRDF_FUNC "[Proc::MaskPllIo] "

    int32_t rc = SUCCESS;

    do
    {
        if (CHECK_STOP == i_sc.service_data->getPrimaryAttnType())
        {
            break;
        }

        if ( i_oscPos >= MAX_PCIE_OSC_PER_NODE )
        {
            PRDF_ERR(PRDF_FUNC "invalid oscPos: %d for chip: "
                     "0x%08x", i_oscPos, i_chip->GetId());
            rc = FAIL;
            break;
        }

        uint32_t oscPos = getIoOscPos( i_chip, i_sc );

        if ( oscPos != i_oscPos )
        {
            PRDF_DTRAC(PRDF_FUNC "skip masking for chip: 0x%08x, "
                      "oscPos: %d, i_oscPos: %d",
                      i_chip->GetId(), oscPos, i_oscPos);
            break;
        }

        // fence off pci osc error reg bit
        SCAN_COMM_REGISTER_CLASS * pciConfigReg =
            i_chip->getRegister("PCI_CONFIG_REG");

        rc = pciConfigReg->Read();
        if (rc != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "PCI_CONFIG_REG read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        if(!pciConfigReg->IsBitSet(PLL_ERROR_MASK))
        {
            pciConfigReg->SetBit(PLL_ERROR_MASK);
            rc = pciConfigReg->Write();
            if (rc != SUCCESS)
            {
                PRDF_ERR(PRDF_FUNC "PCI_CONFIG_REG write failed"
                         "for chip: 0x%08x",
                         i_chip->GetId());
            }
        }

        // Since TP_LFIR bit is the collection of all of the
        // pll error reg bits, we can't mask it or we will not
        // see any PLL errors reported from the error regs

    } while(0);

    return rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 21
0
/**
 * @brief   Calls out the EX chiplet (MRU_LOW), if possible. Otherwise, calls
 *          out the PROC (MRU_LOW)
 * @param   i_chip   P8 chip
 * @param   io_sc    service data collector
 * @returns SUCCESS
 */
int32_t combinedResponseCallout( ExtensibleChip * i_chip,
                                 STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[Proc::combinedResponseCallout] "

    int32_t l_rc = SUCCESS;

    TargetHandle_t procTrgt = i_chip->GetChipHandle();

    SCAN_COMM_REGISTER_CLASS * reg = i_chip->getRegister("PB_CENT_CR_ERROR");

    do
    {
        l_rc = reg->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"Read() failed on PB_CENT_CR_ERROR" );
            break;
        }

        uint32_t tmp = reg->GetBitFieldJustified(0,3);
        if ( 0x02 != tmp ) // Must be 0b010 to continue
        {
            PRDF_ERR( PRDF_FUNC"Unsupported reason code: 0x%02x", tmp );
            l_rc = FAIL; break;
        }

        tmp = reg->GetBitFieldJustified(38,5);
        if ( 0x00 != tmp ) // Must be 0b00000 to continue
        {
            PRDF_ERR( PRDF_FUNC"Unsupported combined response encoding: 0x%02x",
                      tmp );
            l_rc = FAIL; break;
        }

        if ( reg->IsBitSet(22) ) // Must be 0b0 to continue
        {
            PRDF_ERR( PRDF_FUNC"Operation not sourced by an EX chiplet" );
            l_rc = FAIL; break;
        }

        // Get the EX target
        tmp = reg->GetBitFieldJustified(23,4);
        TargetHandle_t exTrgt = getConnectedChild( procTrgt, TYPE_EX, tmp );
        if ( NULL == exTrgt )
        {
            PRDF_ERR( PRDF_FUNC"No connected EX chiplet at position %d", tmp );
            l_rc = FAIL; break;
        }

        // Callout the EX target
        io_sc.service_data->SetCallout( exTrgt, MRU_LOW );

    } while (0);

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Unable to isolate to an EX chiplet. Calling out "
                  "PROC 0x%08x instead.", i_chip->GetId() );

        io_sc.service_data->SetCallout( procTrgt, MRU_LOW );
    }

    return SUCCESS;

    #undef PRDF_FUNC
}
Ejemplo n.º 22
0
int32_t chnlCsCleanup( ExtensibleChip *i_mbChip,
                       STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[MemUtils::chnlCsCleanup] "

    int32_t o_rc = SUCCESS;

    do
    {
        if( (  NULL == i_mbChip ) ||
            ( TYPE_MEMBUF != getTargetType( i_mbChip->GetChipHandle() )))
        {
            PRDF_ERR( PRDF_FUNC "Invalid parameters" );
            o_rc = FAIL; break;
        }

        if (( ! i_sc.service_data->IsUnitCS() ) ||
              (CHECK_STOP == i_sc.service_data->getPrimaryAttnType()) )
            break;

        CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
        if ( !mbdb->iv_doChnlFailCleanup )
            break; // Cleanup has already been done.

        // Set it as SUE generation point.
        i_sc.service_data->SetFlag( ServiceDataCollector::UERE );

        ExtensibleChip * mcsChip = mbdb->getMcsChip();
        if ( NULL == mcsChip )
        {
            PRDF_ERR( PRDF_FUNC "MCS chip is NULL for Membuf:0x%08X",
                      i_mbChip->GetId() );
            o_rc = FAIL; break;
        }

        TargetHandle_t mcs = mcsChip->GetChipHandle();
        ExtensibleChip * procChip = NULL;
        uint8_t pos = getTargetPosition( mcs );
        TargetHandle_t proc = getParentChip ( mcs );

        if ( NULL == proc )
        {
            PRDF_ERR( PRDF_FUNC "Proc is NULL for Mcs:0x%08X", getHuid( mcs ) );
            o_rc = FAIL; break;
        }

        procChip = (ExtensibleChip *)systemPtr->GetChip( proc );

        if( NULL == procChip )
        {
            PRDF_ERR( PRDF_FUNC "Can not find Proc chip for HUID:0x%08X",
                      getHuid( proc) );
            o_rc = FAIL; break;
        }

        // This is a cleanup function. If we get any error from scom
        // operations, we will still continue with cleanup.
        SCAN_COMM_REGISTER_CLASS * l_tpMask =
              procChip->getRegister("TP_CHIPLET_FIR_MASK");
        o_rc |= l_tpMask->Read();
        if ( SUCCESS == o_rc )
        {
            // Bits 5-12 maps to attentions from MCS0-MCS7.
            l_tpMask->SetBit( 5 + pos );
            o_rc |= l_tpMask->Write();
        }

        // Mask attentions from the Centaur
        const char *iomcFirMask = ( pos < 4 )?
                                  "IOMCFIR_0_MASK_OR":"IOMCFIR_1_MASK_OR";

        SCAN_COMM_REGISTER_CLASS * iomcMask =
                                 procChip->getRegister( iomcFirMask);
        if ( pos >= 4 ) pos -= 4;

        // 8 bits are reserved for each Centaur in IOMCFIR.
        // There are total 4 ( for P system ) centaur supported
        // in MCS. Bits for first centaur starts from bit 8.

        iomcMask->SetBitFieldJustified( 8+ ( pos*8 ), 8, 0xff);

        o_rc |= iomcMask->Write();

        SCAN_COMM_REGISTER_CLASS * l_tpfirmask   = NULL;
        SCAN_COMM_REGISTER_CLASS * l_nestfirmask = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memfirmask  = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memspamask  = NULL;

        l_tpfirmask   = i_mbChip->getRegister("TP_CHIPLET_FIR_MASK");
        l_nestfirmask = i_mbChip->getRegister("NEST_CHIPLET_FIR_MASK");
        l_memfirmask  = i_mbChip->getRegister("MEM_CHIPLET_FIR_MASK");
        l_memspamask  = i_mbChip->getRegister("MEM_CHIPLET_SPA_MASK");

        l_tpfirmask->setAllBits();   o_rc |= l_tpfirmask->Write();
        l_nestfirmask->setAllBits(); o_rc |= l_nestfirmask->Write();
        l_memfirmask->setAllBits();  o_rc |= l_memfirmask->Write();
        l_memspamask->setAllBits();  o_rc |= l_memspamask->Write();


        for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = mbdb->getMbaChip( i );
            if( NULL != mbaChip )
            {
                TargetHandle_t mba = mbaChip->GetChipHandle();
                if ( NULL != mba )
                {
                    #if  defined(__HOSTBOOT_MODULE) && \
                        !defined(__HOSTBOOT_RUNTIME)
                    // This is very small platform specific code. So not
                    // creating a separate file for this.
                    int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mdiaSendEventMsg(0x%08x, SKIP_MBA) "
                                  "failed", getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #else
                    int32_t l_rc = DEALLOC::mbaGard( mbaChip  );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mbaGard failed. HUID: 0x%08x",
                                  getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #endif // __HOSTBOOT_MODULE
                }
            }
        }

        // Clean up complete an is no longer required.
        mbdb->iv_doChnlFailCleanup = false;

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 23
0
int32_t collectCeStats( ExtensibleChip * i_mbaChip, const CenRank & i_rank,
                        MaintSymbols & o_maintStats, CenSymbol & o_chipMark,
                        uint8_t i_thr )
{
    #define PRDF_FUNC "[MemUtils::collectCeStats] "

    int32_t o_rc = SUCCESS;

    o_chipMark = CenSymbol(); // Initially invalid.

    do
    {
        if ( 0 == i_thr ) // Must be non-zero
        {
            PRDF_ERR( PRDF_FUNC "i_thr %d is invalid", i_thr );
            o_rc = FAIL; break;
        }

        TargetHandle_t mbaTrgt = i_mbaChip->GetChipHandle();
        CenMbaDataBundle * mbadb = getMbaDataBundle( i_mbaChip );
        ExtensibleChip * membufChip = mbadb->getMembChip();
        if ( NULL == membufChip )
        {
            PRDF_ERR( PRDF_FUNC "getMembChip() failed" );
            o_rc = FAIL; break;
        }

        uint8_t mbaPos = getTargetPosition( mbaTrgt );
        if ( MAX_MBA_PER_MEMBUF <= mbaPos )
        {
            PRDF_ERR( PRDF_FUNC "mbaPos %d is invalid", mbaPos );
            o_rc = FAIL; break;
        }

        const bool isX4 = isDramWidthX4(mbaTrgt);

        // Get the current spares on this rank.
        CenSymbol sp0, sp1, ecc;
        o_rc = mssGetSteerMux( mbaTrgt, i_rank, sp0, sp1, ecc );
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "mssGetSteerMux() failed." );
            break;
        }

        // Use this map to keep track of the total counts per DRAM.
        DramCountMap dramCounts;

        const char * reg_str = NULL;
        SCAN_COMM_REGISTER_CLASS * reg = NULL;

        for ( uint8_t regIdx = 0; regIdx < CE_REGS_PER_MBA; regIdx++ )
        {
            reg_str = mbsCeStatReg[mbaPos][regIdx];
            reg     = membufChip->getRegister( reg_str );

            o_rc = reg->Read();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
                break;
            }

            uint8_t baseSymbol = SYMBOLS_PER_CE_REG * regIdx;

            for ( uint8_t i = 0; i < SYMBOLS_PER_CE_REG; i++ )
            {
                uint8_t count = reg->GetBitFieldJustified( (i*8), 8 );

                if ( 0 == count ) continue; // nothing to do

                uint8_t sym  = baseSymbol + i;
                uint8_t dram = symbol2Dram( sym, isX4 );

                // Keep track of the total DRAM counts.
                dramCounts[dram].totalCount += count;

                // Add any symbols that have exceeded threshold to the list.
                if ( i_thr <= count )
                {
                    // Keep track of the total number of symbols per DRAM that
                    // have exceeded threshold.
                    dramCounts[dram].symbolCount++;

                    SymbolData symData;
                    symData.symbol = CenSymbol::fromSymbol( mbaTrgt, i_rank,
                                            sym, CEN_SYMBOL::BOTH_SYMBOL_DQS );
                    if ( !symData.symbol.isValid() )
                    {
                        PRDF_ERR( PRDF_FUNC "CenSymbol() failed: symbol=%d",
                                  sym );
                        o_rc = FAIL;
                        break;
                    }
                    else
                    {
                        // Check if this symbol is on any of the spares.
                        if ( ( sp0.isValid() &&
                               (sp0.getDram() == symData.symbol.getDram()) ) ||
                             ( sp1.isValid() &&
                               (sp1.getDram() == symData.symbol.getDram()) ) )
                        {
                            symData.symbol.setDramSpared();
                        }
                        if ( ecc.isValid() &&
                             (ecc.getDram() == symData.symbol.getDram()) )
                        {
                            symData.symbol.setEccSpared();
                        }

                        // Add the symbol to the list.
                        symData.count = count;
                        o_maintStats.push_back( symData );
                    }
                }
            }
            if ( SUCCESS != o_rc ) break;
        }
        if ( SUCCESS != o_rc ) break;

        if ( o_maintStats.empty() ) break; // no need to continue

        // Sort the list of symbols.
        std::sort( o_maintStats.begin(), o_maintStats.end(), sortSymDataCount );

        // Get the DRAM with the highest count.
        uint32_t highestDram  = 0;
        uint32_t highestCount = 0;
        const uint32_t symbolTH = isX4 ? 1 : 2;
        for ( DramCountMap::iterator it = dramCounts.begin();
              it != dramCounts.end(); ++it )
        {
            if ( (symbolTH     <= it->second.symbolCount) &&
                 (highestCount <  it->second.totalCount ) )
            {
                highestDram  = it->first;
                highestCount = it->second.totalCount;
            }
        }

        if ( 0 != highestCount )
        {
            uint8_t sym = dram2Symbol( highestDram, isX4 );
            o_chipMark  = CenSymbol::fromSymbol( mbaTrgt, i_rank, sym );

            // Check if this symbol is on any of the spares.
            if ( ( sp0.isValid() && (sp0.getDram() == o_chipMark.getDram()) ) ||
                 ( sp1.isValid() && (sp1.getDram() == o_chipMark.getDram()) ) )
            {
                o_chipMark.setDramSpared();
            }
            if ( ecc.isValid() && (ecc.getDram() == o_chipMark.getDram()) )
            {
                o_chipMark.setEccSpared();
            }
        }

    } while(0);

    if ( SUCCESS != o_rc )
    {
        PRDF_ERR( PRDF_FUNC "Failed: i_mbaChip=0x%08x i_rank=m%ds%d i_thr=%d",
                  i_mbaChip->GetId(), i_rank.getMaster(), i_rank.getSlave(),
                  i_thr );
    }

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 24
0
int32_t CenMbaTdCtlrCommon::checkEccErrors( uint16_t & o_eccErrorMask,
                                            STEP_CODE_DATA_STRUCT & io_sc )
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::checkEccErrors] "

    int32_t o_rc = SUCCESS;

    o_eccErrorMask = NO_ERROR;

    do
    {
        const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR"
                                                : "MBA1_MBSECCFIR";
        SCAN_COMM_REGISTER_CLASS * mbsEccFir
                                        = iv_membChip->getRegister( reg_str );
        o_rc = mbsEccFir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Read() failed on %s", reg_str );
            break;
        }

        if ( mbsEccFir->IsBitSet(20 + iv_rank.getMaster()) )
        {
            o_eccErrorMask |= MPE;
            io_sc.service_data->AddSignatureList(iv_mbaTrgt, PRDFSIG_MaintMPE);

            // Clean up side-effect FIRs that may be set due to the chip mark.
            o_rc = chipMarkCleanup();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "chipMarkCleanup() failed" );
                break;
            }
        }

        if ( mbsEccFir->IsBitSet(38) )
        {
            // No need to add error signature. MCE is not error. It will be
            // handled only in VCM/DSD phase 2.
            o_eccErrorMask |= MCE;
        }

        if ( mbsEccFir->IsBitSet(41) )
        {
            o_eccErrorMask |= UE;
            io_sc.service_data->AddSignatureList( iv_mbaTrgt, PRDFSIG_MaintUE );
        }

        SCAN_COMM_REGISTER_CLASS * mbaSpaFir =
                            iv_mbaChip->getRegister("MBASPA");
        o_rc = mbaSpaFir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Failed to read MBASPA Regsiter");
            break;
        }

        if ( mbaSpaFir->IsBitSet(1) )
        {
            o_eccErrorMask |= HARD_CTE;
            io_sc.service_data->AddSignatureList( iv_mbaTrgt,
                                                  PRDFSIG_MaintHARD_CTE );
        }

        if ( mbaSpaFir->IsBitSet(2) )
        {
            o_eccErrorMask |= SOFT_CTE;
            io_sc.service_data->AddSignatureList( iv_mbaTrgt,
                                                  PRDFSIG_MaintSOFT_CTE );
        }

        if ( mbaSpaFir->IsBitSet(3) )
        {
            o_eccErrorMask |= INTER_CTE;
            io_sc.service_data->AddSignatureList( iv_mbaTrgt,
                                                  PRDFSIG_MaintINTER_CTE );
        }

        if ( mbaSpaFir->IsBitSet(4) )
        {
            o_eccErrorMask |= RETRY_CTE;
            io_sc.service_data->AddSignatureList( iv_mbaTrgt,
                                                  PRDFSIG_MaintRETRY_CTE );
        }

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 25
0
/**
 * @brief Analysis code that is called before the main analyze() function.
 * @param i_mbChip A MEMBUF chip.
 * @param i_sc Step Code Data structure
 * @param o_analyzed TRUE if analysis has been done on this chip
 * @return failure or success
 */
int32_t PreAnalysis( ExtensibleChip * i_mbChip, STEP_CODE_DATA_STRUCT & i_sc,
                     bool & o_analyzed )
{
    #define PRDF_FUNC "[Membuf::PreAnalysis] "

    int32_t o_rc = SUCCESS;

    o_analyzed = false;

    // Get memory capture data.
    CaptureData & cd = i_sc.service_data->GetCaptureData();
    CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
    ExtensibleChip * mcsChip = mbdb->getMcsChip();
    if ( NULL != mcsChip )
    {
        mcsChip->CaptureErrorData( cd, Util::hashString("FirRegs") );
        mcsChip->CaptureErrorData( cd, Util::hashString("CerrRegs") );

        CenMbaCaptureData::addMemChipletFirRegs( i_mbChip, cd );
    }

    // Check for a Centaur Checkstop
    do
    {
        // Skip if we're already analyzing a unit checkstop
        if ( i_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) )
            break;

        // Skip if we're analyzing a special attention.
        // This is a required for a rare scenario when Centaur CS bit comes
        // up after attention has called PRD and PRD was still at start of
        // analysis.
        if ( SPECIAL == i_sc.service_data->GetAttentionType() )
            break;

        // MCIFIR[31] is not always reliable if the unit CS originated on the
        // Centaur. This is due to packets not getting forwarded to the MCS.
        // Instead, check for non-zero GLOBAL_CS_FIR.

        SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("GLOBAL_CS_FIR");
        o_rc = fir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Failed to read GLOBAL_CS_FIR on 0x%08x",
                      i_mbChip->GetId() );
            break;
        }

        if ( fir->BitStringIsZero() ) break; // No unit checkstop

        // Set Unit checkstop flag
        i_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS);
        i_sc.service_data->SetThresholdMaskId(0);

        // Set the cause attention type
        i_sc.service_data->SetCauseAttentionType(UNIT_CS);

    } while (0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 26
0
/**
 * @brief  MBSECCFIR[16] - Fetch New CE (NCE).
 * @param  i_membChip A Centaur chip.
 * @param  i_sc       The step code data struct.
 * @param  i_mbaPos   The MBA position.
 * @return SUCCESS
 */
int32_t AnalyzeFetchNce( ExtensibleChip * i_membChip,
                         STEP_CODE_DATA_STRUCT & i_sc, uint32_t i_mbaPos )
{
    #define PRDF_FUNC "[AnalyzeFetchNce] "

    int32_t l_rc = SUCCESS;

    ExtensibleChip * mbaChip = NULL;

    do
    {
        CenMembufDataBundle * membdb = getMembufDataBundle( i_membChip );
        mbaChip = membdb->getMbaChip( i_mbaPos );
        if ( NULL == mbaChip )
        {
            PRDF_ERR( PRDF_FUNC"getMbaChip() returned NULL" );
            l_rc = FAIL; break;
        }
        TargetHandle_t mbaTrgt = mbaChip->GetChipHandle();

        CenAddr addr;
        l_rc = getCenReadAddr( i_membChip, i_mbaPos, READ_NCE_ADDR, addr );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"getCenReadAddr() failed" );
            break;
        }
        CenRank rank = addr.getRank();

        if ( 0x20 > getChipLevel(i_membChip->GetChipHandle()) )
        {
            // There is a bug in DD1.x where the value of MBSEVR cannot be
            // trusted. The workaround is too complicated for its value so
            // callout the rank instead.
            MemoryMru memmru ( mbaTrgt, rank, MemoryMruData::CALLOUT_RANK );
            i_sc.service_data->SetCallout( memmru );
        }
        else // DD2.0+
        {
            // Get the failing symbol
            const char * reg_str = (0 == i_mbaPos) ? "MBA0_MBSEVR"
                                                   : "MBA1_MBSEVR";
            SCAN_COMM_REGISTER_CLASS * reg = i_membChip->getRegister(reg_str);
            l_rc = reg->Read();
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"Read() failed on %s", reg_str );
                break;
            }

            uint8_t galois = reg->GetBitFieldJustified( 40, 8 );
            uint8_t mask   = reg->GetBitFieldJustified( 32, 8 );

            CenSymbol symbol = CenSymbol::fromGalois( mbaTrgt, rank, galois,
                                                      mask );
            if ( !symbol.isValid() )
            {
                PRDF_ERR( PRDF_FUNC"Failed to create symbol: galois=0x%02x "
                          "mask=0x%02x", galois, mask );
                break;
            }

            // Check if this symbol is on any of the spares.
            CenSymbol sp0, sp1, ecc;
            l_rc = mssGetSteerMux( mbaTrgt, rank, sp0, sp1, ecc );
            if ( SUCCESS != l_rc )
            {
                PRDF_ERR( PRDF_FUNC"mssGetSteerMux() failed. HUID: 0x%08x "
                        "rank: %d", getHuid(mbaTrgt), rank.getMaster() );
                break;
            }
            if ( (sp0.isValid() && (sp0.getDram() == symbol.getDram())) ||
                 (sp1.isValid() && (sp1.getDram() == symbol.getDram())) )
            {
                symbol.setDramSpared();
            }
            if ( ecc.isValid() && (ecc.getDram() == symbol.getDram()) )
            {
                symbol.setEccSpared();
            }

            // Add the DIMM to the callout list
            MemoryMru memmru ( mbaTrgt, rank, symbol );
            i_sc.service_data->SetCallout( memmru, MRU_MEDA );

            // Add to CE table
            CenMbaDataBundle * mbadb = getMbaDataBundle( mbaChip );
            uint32_t ceTableRc = mbadb->iv_ceTable.addEntry( addr, symbol );
            bool doTps = ( CenMbaCeTable::NO_TH_REACHED != ceTableRc );

            // Check MNFG thresholds, if needed.
            if ( mfgMode() )
            {
                // Get the MNFG CE thresholds.
                uint16_t dramTh, hrTh, dimmTh;
                l_rc = getMnfgMemCeTh( mbaChip, rank, dramTh, hrTh, dimmTh );
                if ( SUCCESS != l_rc )
                {
                    PRDF_ERR( PRDF_FUNC"getMnfgMemCeTh() failed: rank=m%ds%d",
                              rank.getMaster(), rank.getSlave() );
                    break;
                }

                // Get counts from CE table.
                uint32_t dramCount, hrCount, dimmCount;
                mbadb->iv_ceTable.getMnfgCounts( addr.getRank(), symbol,
                                                 dramCount, hrCount,
                                                 dimmCount );

                if ( dramTh < dramCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgDramCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( hrTh < hrCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgHrCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( dimmTh < dimmCount )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgDimmCte );
                    i_sc.service_data->SetServiceCall();
                    doTps = true;
                }
                else if ( 0 != (CenMbaCeTable::TABLE_FULL & ceTableRc) )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgTableFull);

                    // The table is full and no other threshold has been met.
                    // We are in a state where we may never hit a MNFG
                    // threshold. Callout all memory behind the MBA. Also, since
                    // the counts are all over the place, there may be a problem
                    // with the MBA. So call it out as well.
                    MemoryMru all_mm ( mbaTrgt, rank,
                                       MemoryMruData::CALLOUT_ALL_MEM );
                    i_sc.service_data->SetCallout( all_mm,  MRU_MEDA );
                    i_sc.service_data->SetCallout( mbaTrgt, MRU_MEDA );
                    i_sc.service_data->SetServiceCall();
                }
                else if ( 0 != (CenMbaCeTable::ENTRY_TH_REACHED & ceTableRc) )
                {
                    i_sc.service_data->AddSignatureList( mbaTrgt,
                                                         PRDFSIG_MnfgEntryCte );

                    // There is a single entry threshold and no other threshold
                    // has been met. This is a potential flooding issue, so make
                    // the DIMM callout predictive.
                    i_sc.service_data->SetServiceCall();
                }
            }

            // Initiate a TPS procedure, if needed.
            if ( doTps )
            {
                // If a MNFG threshold has been reached (predictive callout), we
                // will still try to start TPS just in case MNFG disables the
                // termination policy.

                // Will not be able to do TPS during hostboot. Note that we will
                // still call handleTdEvent() so we can get the trace statement
                // indicating TPS was requested during Hostboot.

                l_rc = mbadb->iv_tdCtlr.handleTdEvent( i_sc, rank,
                                                CenMbaTdCtlrCommon::TPS_EVENT );
                if ( SUCCESS != l_rc )
                {
                    PRDF_ERR( PRDF_FUNC"handleTdEvent() failed: rank=m%ds%d",
                              rank.getMaster(), rank.getSlave() );
                    break;
                }
            }
        }

    } while (0);

    // Add ECC capture data for FFDC.
    if ( NULL != mbaChip )
        CenMbaCaptureData::addMemEccData( mbaChip, i_sc );

    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC"Failed: i_membChip=0x%08x i_mbaPos=%d",
                  i_membChip->GetId(), i_mbaPos );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS; // Intentionally return SUCCESS for this plugin

    #undef PRDF_FUNC
}
Ejemplo n.º 27
0
int32_t CenMbaTdCtlrCommon::prepareNextCmd( bool i_clearStats )
{
    #define PRDF_FUNC "[CenMbaTdCtlrCommon::prepareNextCmd] "

    int32_t o_rc = SUCCESS;

    do
    {
        //----------------------------------------------------------------------
        // Clean up previous command
        //----------------------------------------------------------------------

        o_rc = cleanupPrevCmd();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "cleanupPrevCmd() failed" );
            break;
        }

        //----------------------------------------------------------------------
        // Clear ECC counters
        //----------------------------------------------------------------------

        const char * reg_str = NULL;

        if ( i_clearStats )
        {
            reg_str = (0 == iv_mbaPos) ? "MBA0_MBSTR" : "MBA1_MBSTR";
            SCAN_COMM_REGISTER_CLASS * mbstr =
                                    iv_membChip->getRegister( reg_str );

            // MBSTR's content could be modified from cleanupCmd()
            // so we need to refresh
            o_rc = mbstr->ForceRead();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "ForceRead() failed on %s", reg_str );
                break;
            }

            mbstr->SetBit(53); // Setting this bit clears all counters.

            o_rc = mbstr->Write();
            if ( SUCCESS != o_rc )
            {
                PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str );
                break;
            }

            // Hardware automatically clears bit 53, so flush this register out
            // of the register cache to avoid clearing the counters again with
            // a write from the out-of-date cached copy.
            RegDataCache & cache = RegDataCache::getCachedRegisters();
            cache.flush( iv_membChip, mbstr );
        }

        //----------------------------------------------------------------------
        // Clear ECC FIRs
        //----------------------------------------------------------------------

        reg_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR_AND"
                                   : "MBA1_MBSECCFIR_AND";
        SCAN_COMM_REGISTER_CLASS * firand = iv_membChip->getRegister( reg_str );
        firand->setAllBits();

        // Clear all scrub MPE bits.
        // This will need to be done when starting a TD procedure or background
        // scrubbing. iv_rank may not be set when starting background scrubbing
        // and technically there should only be one of these MPE bits on at a
        // time so we should not have to worry about losing an attention by
        // clearing them all.
        firand->SetBitFieldJustified( 20, 8, 0 );

        // Clear scrub NCE, SCE, MCE, RCE, SUE, UE bits (36-41)
        firand->SetBitFieldJustified( 36, 6, 0 );

        o_rc = firand->Write();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str );
            break;
        }

        SCAN_COMM_REGISTER_CLASS * spaAnd =
                                iv_mbaChip->getRegister("MBASPA_AND");
        spaAnd->setAllBits();

        // Clear threshold exceeded attentions
        spaAnd->SetBitFieldJustified( 1, 4, 0 );

        o_rc = spaAnd->Write();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC "Write() failed on MBASPA_AND" );
            break;
        }

    } while (0);

    return o_rc;

    #undef PRDF_FUNC
}
Ejemplo n.º 28
0
/**
 * @fn ClearMbsSecondaryBits
 * @brief Clears MBS secondary Fir bits which may come up because of primary
 *        MBS/MBI FIR bits.
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.
 */
int32_t ClearMbsSecondaryBits( ExtensibleChip * i_chip,
                               STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[ClearMbsSecondaryBits] "

    int32_t l_rc = SUCCESS;
    do
    {
        SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR");
        SCAN_COMM_REGISTER_CLASS * mbsFirMask =
                                        i_chip->getRegister("MBSFIR_MASK");
        SCAN_COMM_REGISTER_CLASS * mbsFirAnd =
                                        i_chip->getRegister("MBSFIR_AND");
        l_rc = mbsFir->Read();
        l_rc |= mbsFirMask->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MBSFIR/MBSFIR_MASK read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        mbsFirAnd->setAllBits();

        if ( mbsFir->IsBitSet(26)
             && mbsFir->IsBitSet(9) && ( ! mbsFirMask->IsBitSet(9)))
        {
            mbsFirAnd->ClearBit(26);
        }

        if( mbsFir->IsBitSet(3) ||  mbsFir->IsBitSet(4) )
        {
            SCAN_COMM_REGISTER_CLASS * mbiFir = i_chip->getRegister("MBIFIR");
            SCAN_COMM_REGISTER_CLASS * mbiFirMask =
                                            i_chip->getRegister("MBIFIR_MASK");
            l_rc = mbiFir->Read();
            l_rc |= mbiFirMask->Read();
            if ( SUCCESS != l_rc )
            {
                // Do not break from here, just print error trace.
                // If there are other secondary bits ( e.g. 26, 27 ),
                // we want to clear them.
                PRDF_ERR( PRDF_FUNC"MBIFIR/MASK read failed"
                         "for 0x%08x", i_chip->GetId());
            }
            else if ( mbiFir->IsBitSet( 0 ) && ( ! mbiFirMask->IsBitSet( 0 )) )
            {
                mbsFirAnd->ClearBit(3);
                mbsFirAnd->ClearBit(4);
            }
        }

        l_rc = mbsFirAnd->Write();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MBSFIR_AND write failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

    }while( 0 );
    return SUCCESS;

    #undef PRDF_FUNC
} PRDF_PLUGIN_DEFINE( Membuf, ClearMbsSecondaryBits );
Ejemplo n.º 29
0
/**
 * @fn ClearMbaCalSecondaryBits
 * @brief Clears MBACAL secondary Fir bits which may come up because of MBSFIR
 * @param  i_chip       The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 * @return SUCCESS.

 */
int32_t ClearMbaCalSecondaryBits( ExtensibleChip * i_chip,
                                  STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[ClearMbaCalSecondaryBits ] "
    int32_t l_rc = SUCCESS;

    do
    {
        SCAN_COMM_REGISTER_CLASS * mbsFir = i_chip->getRegister("MBSFIR");
        SCAN_COMM_REGISTER_CLASS * mbsFirMask =
                                        i_chip->getRegister("MBSFIR_MASK");
        l_rc = mbsFir->Read();
        l_rc |= mbsFirMask->Read();
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MBSFIR/MBSFIR_MASK read failed"
                     "for 0x%08x", i_chip->GetId());
            break;
        }

        CenMembufDataBundle * membdb = getMembufDataBundle( i_chip );

        for( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = membdb->getMbaChip(i);
            if ( NULL == mbaChip ) continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalFir =
                                mbaChip->getRegister("MBACALFIR");

            if( SUCCESS != mbaCalFir->Read() )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR read failed"
                         "for 0x%08x", mbaChip->GetId());
                continue;
            }

            if( !( mbaCalFir->IsBitSet( 10 ) || mbaCalFir->IsBitSet( 14 ) ))
                continue;

            SCAN_COMM_REGISTER_CLASS * mbaCalAndFir =
                                mbaChip->getRegister("MBACALFIR_AND");

            mbaCalAndFir->setAllBits();

            mbaCalAndFir->ClearBit(10);
            mbaCalAndFir->ClearBit(14);

            l_rc = mbaCalAndFir->Write();
            if ( SUCCESS != l_rc )
            {
                // Do not break. Just print error trace and look for
                // other MBA.
                PRDF_ERR( PRDF_FUNC"MBACALFIR_AND write failed"
                              "for 0x%08x", mbaChip->GetId());
            }
        }

    }while( 0 );

    return SUCCESS;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, ClearMbaCalSecondaryBits );
Ejemplo n.º 30
0
/**
  * @brief  Clear the PLL error for P8 Plugin
  * @param  i_chip P8 chip
  * @param  i_sc   The step code data struct
  * @returns Failure or Success of query.
  */
int32_t ClearPllIo( ExtensibleChip * i_chip,
                        STEP_CODE_DATA_STRUCT & i_sc)
{
    #define PRDF_FUNC "[Proc::ClearPllIo] "

    int32_t rc = SUCCESS;

    if (CHECK_STOP != i_sc.service_data->getPrimaryAttnType())
    {
        // Clear pci osc error reg bit
        int32_t tmpRC = SUCCESS;
        SCAN_COMM_REGISTER_CLASS * pciErrReg =
                i_chip->getRegister("PCI_ERROR_REG");

        tmpRC = pciErrReg->Read();
        if (tmpRC != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "PCI_ERROR_REG read failed"
                     "for chip: 0x%08x", i_chip->GetId());
            rc |= tmpRC;
        }

        if( pciErrReg->IsBitSet( PLL_ERROR_BIT ) )
        {
            pciErrReg->clearAllBits();
            pciErrReg->SetBit(PLL_ERROR_BIT);
            tmpRC = pciErrReg->Write();

            if ( SUCCESS != tmpRC )
            {
                PRDF_ERR( PRDF_FUNC "Write() failed on PCI Error register: "
                          "proc=0x%08x", i_chip->GetId() );
                rc |= tmpRC;
            }
        }

        // Clear TP_LFIR
        SCAN_COMM_REGISTER_CLASS * TP_LFIRand =
                   i_chip->getRegister("TP_LFIR_AND");
        TP_LFIRand->setAllBits();
        TP_LFIRand->ClearBit(PLL_DETECT_P8);
        tmpRC = TP_LFIRand->Write();
        if (tmpRC != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "TP_LFIR_AND write failed"
                     "for chip: 0x%08x", i_chip->GetId());
            rc |= tmpRC;
        }

        SCAN_COMM_REGISTER_CLASS * oscCerrReg =
                i_chip->getRegister("OSCERR");

        tmpRC = oscCerrReg->Read();
        if (tmpRC != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "OSCERR read failed"
                     "for 0x%08x", i_chip->GetId());
            rc |= tmpRC;
        }
        oscCerrReg->ClearBit(4);
        oscCerrReg->ClearBit(5);
        tmpRC = oscCerrReg->Write();
        if (tmpRC != SUCCESS)
        {
            PRDF_ERR(PRDF_FUNC "oscCerrReg write failed"
                     "for chip: 0x%08x", i_chip->GetId());
            rc |= tmpRC;
        }

    }

    if( rc != SUCCESS )
    {
        PRDF_ERR(PRDF_FUNC "failed for proc: 0x%.8X",
                 i_chip->GetId());
    }

    return rc;

    #undef PRDF_FUNC
}