Пример #1
0
/**
 * @brief Handles MCS Channel fail bits, if they exist.
 *
 * @param  i_membChip   The Centaur chip.
 * @param  i_sc         ServiceDataColector.
 *
 * @return SUCCESS if MCS channel fail is present and properly
 *         handled, FAIL otherwise.
 */
int32_t handleMcsChnlCs( ExtensibleChip * i_membChip,
                    STEP_CODE_DATA_STRUCT & i_sc  )
{
    #define PRDF_FUNC "[handleMcsChnlCs] "

    // We will return FAIL from this function if MCS channel fail  bits
    // are not set. If MCS channel fail bits are set, we will try to analyze
    // Mcs. If MCS is not analyzed properly, we will return FAIL.
    // This will trigger rule code to execute alternate resolution.

    int32_t l_rc = SUCCESS;
    do
    {
        CenMembufDataBundle * mbdb = getMembufDataBundle( i_membChip );
        ExtensibleChip * mcsChip =    mbdb->getMcsChip();
        if( NULL == mcsChip )
        {
            l_rc = FAIL;
            break;
        }

        SCAN_COMM_REGISTER_CLASS * mciFir = mcsChip->getRegister("MCIFIR");
        SCAN_COMM_REGISTER_CLASS * mciFirMask =
                                        mcsChip->getRegister("MCIFIR_MASK");

        l_rc = mciFir->Read();
        l_rc |= mciFirMask->Read();

        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC"MCIFIR/MCIFIR_MASK read failed for 0x%08x",
                      mcsChip->GetId());
            break;
        }

        // If any of MCS channel fail bit is set, we will analyze
        // MCS. It is safe to do hard coded check as channel fail
        // bits are hard wired and and they can not change without HW
        // change.
        // bits 0,1, 6, 8, 9, 22, 23, 40 are channel fail bits.
        uint64_t chnlCsBitsMask = 0xC2C0030000800000ull;
        uint64_t mciFirBits     = mciFir->GetBitFieldJustified(0, 64);
        uint64_t mciFirMaskBits = mciFirMask->GetBitFieldJustified(0, 64);

        if ( mciFirBits & ~mciFirMaskBits & chnlCsBitsMask )
        {
            l_rc = mcsChip->Analyze( i_sc,
                        i_sc.service_data->GetCauseAttentionType() );

            if( SUCCESS == l_rc ) break;
        }

        l_rc = FAIL;

    }while( 0 );

    return l_rc;
    #undef PRDF_FUNC

} PRDF_PLUGIN_DEFINE( Membuf, handleMcsChnlCs );
Пример #2
0
int32_t chnlCsCleanup( ExtensibleChip *i_mbChip,
                       STEP_CODE_DATA_STRUCT & i_sc )
{
    #define PRDF_FUNC "[MemUtils::chnlCsCleanup] "

    int32_t o_rc = SUCCESS;

    do
    {
        if( (  NULL == i_mbChip ) ||
            ( TYPE_MEMBUF != getTargetType( i_mbChip->GetChipHandle() )))
        {
            PRDF_ERR( PRDF_FUNC "Invalid parameters" );
            o_rc = FAIL; break;
        }

        if (( ! i_sc.service_data->IsUnitCS() ) ||
              (CHECK_STOP == i_sc.service_data->getPrimaryAttnType()) )
            break;

        CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
        if ( !mbdb->iv_doChnlFailCleanup )
            break; // Cleanup has already been done.

        // Set it as SUE generation point.
        i_sc.service_data->SetFlag( ServiceDataCollector::UERE );

        ExtensibleChip * mcsChip = mbdb->getMcsChip();
        if ( NULL == mcsChip )
        {
            PRDF_ERR( PRDF_FUNC "MCS chip is NULL for Membuf:0x%08X",
                      i_mbChip->GetId() );
            o_rc = FAIL; break;
        }

        TargetHandle_t mcs = mcsChip->GetChipHandle();
        ExtensibleChip * procChip = NULL;
        uint8_t pos = getTargetPosition( mcs );
        TargetHandle_t proc = getParentChip ( mcs );

        if ( NULL == proc )
        {
            PRDF_ERR( PRDF_FUNC "Proc is NULL for Mcs:0x%08X", getHuid( mcs ) );
            o_rc = FAIL; break;
        }

        procChip = (ExtensibleChip *)systemPtr->GetChip( proc );

        if( NULL == procChip )
        {
            PRDF_ERR( PRDF_FUNC "Can not find Proc chip for HUID:0x%08X",
                      getHuid( proc) );
            o_rc = FAIL; break;
        }

        // This is a cleanup function. If we get any error from scom
        // operations, we will still continue with cleanup.
        SCAN_COMM_REGISTER_CLASS * l_tpMask =
              procChip->getRegister("TP_CHIPLET_FIR_MASK");
        o_rc |= l_tpMask->Read();
        if ( SUCCESS == o_rc )
        {
            // Bits 5-12 maps to attentions from MCS0-MCS7.
            l_tpMask->SetBit( 5 + pos );
            o_rc |= l_tpMask->Write();
        }

        // Mask attentions from the Centaur
        const char *iomcFirMask = ( pos < 4 )?
                                  "IOMCFIR_0_MASK_OR":"IOMCFIR_1_MASK_OR";

        SCAN_COMM_REGISTER_CLASS * iomcMask =
                                 procChip->getRegister( iomcFirMask);
        if ( pos >= 4 ) pos -= 4;

        // 8 bits are reserved for each Centaur in IOMCFIR.
        // There are total 4 ( for P system ) centaur supported
        // in MCS. Bits for first centaur starts from bit 8.

        iomcMask->SetBitFieldJustified( 8+ ( pos*8 ), 8, 0xff);

        o_rc |= iomcMask->Write();

        SCAN_COMM_REGISTER_CLASS * l_tpfirmask   = NULL;
        SCAN_COMM_REGISTER_CLASS * l_nestfirmask = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memfirmask  = NULL;
        SCAN_COMM_REGISTER_CLASS * l_memspamask  = NULL;

        l_tpfirmask   = i_mbChip->getRegister("TP_CHIPLET_FIR_MASK");
        l_nestfirmask = i_mbChip->getRegister("NEST_CHIPLET_FIR_MASK");
        l_memfirmask  = i_mbChip->getRegister("MEM_CHIPLET_FIR_MASK");
        l_memspamask  = i_mbChip->getRegister("MEM_CHIPLET_SPA_MASK");

        l_tpfirmask->setAllBits();   o_rc |= l_tpfirmask->Write();
        l_nestfirmask->setAllBits(); o_rc |= l_nestfirmask->Write();
        l_memfirmask->setAllBits();  o_rc |= l_memfirmask->Write();
        l_memspamask->setAllBits();  o_rc |= l_memspamask->Write();


        for ( uint32_t i = 0; i < MAX_MBA_PER_MEMBUF; i++ )
        {
            ExtensibleChip * mbaChip = mbdb->getMbaChip( i );
            if( NULL != mbaChip )
            {
                TargetHandle_t mba = mbaChip->GetChipHandle();
                if ( NULL != mba )
                {
                    #if  defined(__HOSTBOOT_MODULE) && \
                        !defined(__HOSTBOOT_RUNTIME)
                    // This is very small platform specific code. So not
                    // creating a separate file for this.
                    int32_t l_rc = mdiaSendEventMsg( mba, MDIA::SKIP_MBA );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mdiaSendEventMsg(0x%08x, SKIP_MBA) "
                                  "failed", getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #else
                    int32_t l_rc = DEALLOC::mbaGard( mbaChip  );
                    if ( SUCCESS != l_rc )
                    {
                        PRDF_ERR( PRDF_FUNC "mbaGard failed. HUID: 0x%08x",
                                  getHuid( mba ) );
                        o_rc |= l_rc;
                    }
                    #endif // __HOSTBOOT_MODULE
                }
            }
        }

        // Clean up complete an is no longer required.
        mbdb->iv_doChnlFailCleanup = false;

    } while(0);

    return o_rc;

    #undef PRDF_FUNC
}
Пример #3
0
/**
 * @brief Analysis code that is called before the main analyze() function.
 * @param i_mbChip A MEMBUF chip.
 * @param i_sc Step Code Data structure
 * @param o_analyzed TRUE if analysis has been done on this chip
 * @return failure or success
 */
int32_t PreAnalysis( ExtensibleChip * i_mbChip, STEP_CODE_DATA_STRUCT & i_sc,
                     bool & o_analyzed )
{
    #define PRDF_FUNC "[Membuf::PreAnalysis] "

    int32_t o_rc = SUCCESS;

    o_analyzed = false;

    // Get memory capture data.
    CaptureData & cd = i_sc.service_data->GetCaptureData();
    CenMembufDataBundle * mbdb = getMembufDataBundle( i_mbChip );
    ExtensibleChip * mcsChip = mbdb->getMcsChip();
    if ( NULL != mcsChip )
    {
        mcsChip->CaptureErrorData( cd, Util::hashString("FirRegs") );
        mcsChip->CaptureErrorData( cd, Util::hashString("CerrRegs") );

        CenMbaCaptureData::addMemChipletFirRegs( i_mbChip, cd );
    }

    // Check for a Centaur Checkstop
    do
    {
        // Skip if we're already analyzing a unit checkstop
        if ( i_sc.service_data->GetFlag(ServiceDataCollector::UNIT_CS) )
            break;

        // Skip if we're analyzing a special attention.
        // This is a required for a rare scenario when Centaur CS bit comes
        // up after attention has called PRD and PRD was still at start of
        // analysis.
        if ( SPECIAL == i_sc.service_data->GetAttentionType() )
            break;

        // MCIFIR[31] is not always reliable if the unit CS originated on the
        // Centaur. This is due to packets not getting forwarded to the MCS.
        // Instead, check for non-zero GLOBAL_CS_FIR.

        SCAN_COMM_REGISTER_CLASS * fir = i_mbChip->getRegister("GLOBAL_CS_FIR");
        o_rc = fir->Read();
        if ( SUCCESS != o_rc )
        {
            PRDF_ERR( PRDF_FUNC"Failed to read GLOBAL_CS_FIR on 0x%08x",
                      i_mbChip->GetId() );
            break;
        }

        if ( fir->BitStringIsZero() ) break; // No unit checkstop

        // Set Unit checkstop flag
        i_sc.service_data->SetFlag(ServiceDataCollector::UNIT_CS);
        i_sc.service_data->SetThresholdMaskId(0);

        // Set the cause attention type
        i_sc.service_data->SetCauseAttentionType(UNIT_CS);

    } while (0);

    return o_rc;

    #undef PRDF_FUNC
}