Ejemplo n.º 1
0
int32_t handleLaneRepairEvent( ExtensibleChip * i_chip,
                               TYPE i_busType,
                               uint32_t i_busPos,
                               STEP_CODE_DATA_STRUCT & i_sc,
                               bool i_spareDeployed )
{
    #define PRDF_FUNC "[LaneRepair::handleLaneRepairEvent] "

    int32_t l_rc = SUCCESS;
    TargetHandle_t rxBusTgt = NULL;
    TargetHandle_t txBusTgt = NULL;
    bool thrExceeded = true;
    std::vector<uint8_t> rx_lanes;
    std::vector<uint8_t> rx_vpdLanes;
    std::vector<uint8_t> tx_vpdLanes;
    BitStringBuffer l_vpdLaneMap0to63(64);
    BitStringBuffer l_vpdLaneMap64to127(64);
    BitStringBuffer l_newLaneMap0to63(64);
    BitStringBuffer l_newLaneMap64to127(64);

    do
    {
        #ifdef __HOSTBOOT_MODULE
        if ( CHECK_STOP == i_sc.service_data->GetAttentionType() )
        {
            // This would only happen on OpenPOWER machines when we are doing
            // the post IPL analysis. In this case, we do not have the FFDC to
            // query the IO registers so simply set service call and skip
            // everything else.
            i_sc.service_data->SetServiceCall();
            return SUCCESS;
        }
        #endif

        // Get the RX and TX targets.
        l_rc = CalloutUtil::getBusEndpoints( i_chip, rxBusTgt, txBusTgt,
                                             i_busType, i_busPos );
        if ( SUCCESS != l_rc )
        {
            PRDF_ERR( PRDF_FUNC "getBusEndpoints() failed" );
            break;
        }

        // Call io_read_erepair
        l_rc = readErepair(rxBusTgt, rx_lanes);
        if (SUCCESS != l_rc)
        {
            PRDF_ERR( PRDF_FUNC "readErepair() failed: rxBusTgt=0x%08x",
                      getHuid(rxBusTgt) );
            break;
        }

        // Add newly failed lanes to capture data
        for (std::vector<uint8_t>::iterator lane = rx_lanes.begin();
             lane != rx_lanes.end(); ++lane)
        {
            PRDF_INF( PRDF_FUNC "New failed lane on RX HUID 0x%08x: %d",
                      getHuid(rxBusTgt), *lane);
            if (*lane < 64)
                l_newLaneMap0to63.Set(*lane);
            else if (*lane < 127)
                l_newLaneMap64to127.Set(*lane - 64);
            else
            {
                PRDF_ERR( PRDF_FUNC "Invalid lane number %d: rxBusTgt=0x%08x",
                          *lane, getHuid(rxBusTgt) );
                l_rc = FAIL; break;
            }
        }
        if ( SUCCESS != l_rc ) break;

        // Add failed lane capture data to errorlog
        i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(),
                                ( Util::hashString("ALL_FAILED_LANES_0TO63") ^
                                  i_chip->getSignatureOffset() ),
                                l_newLaneMap0to63);
        i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(),
                                ( Util::hashString("ALL_FAILED_LANES_64TO127") ^
                                  i_chip->getSignatureOffset() ),
                                l_newLaneMap64to127);

        if (!mfgMode()) // Don't read/write VPD in mfg mode
        {
            // Read Failed Lanes from VPD
            l_rc = getVpdFailedLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes);
            if (SUCCESS != l_rc)
            {
                PRDF_ERR( PRDF_FUNC "getVpdFailedLanes() failed: "
                          "rxBusTgt=0x%08x", getHuid(rxBusTgt) );
                break;
            }

            // Add VPD lanes to capture data
            for (std::vector<uint8_t>::iterator lane = rx_vpdLanes.begin();
                 lane != rx_vpdLanes.end(); ++lane)
            {
                if (*lane < 64)
                    l_vpdLaneMap0to63.Set(*lane);
                else if (*lane < 127)
                    l_vpdLaneMap64to127.Set(*lane - 64);
                else
                {
                    PRDF_ERR( PRDF_FUNC "Invalid VPD lane number %d: "
                              "rxBusTgt=0x%08x", *lane, getHuid(rxBusTgt) );
                    l_rc = FAIL; break;
                }
            }
            if ( SUCCESS != l_rc ) break;

            // Add failed lane capture data to errorlog
            i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(),
                                ( Util::hashString("VPD_FAILED_LANES_0TO63") ^
                                  i_chip->getSignatureOffset() ),
                                l_vpdLaneMap0to63);
            i_sc.service_data->GetCaptureData().Add(i_chip->GetChipHandle(),
                               ( Util::hashString("VPD_FAILED_LANES_64TO127") ^
                                 i_chip->getSignatureOffset() ),
                               l_vpdLaneMap64to127);

            if (i_spareDeployed)
            {
                // Call Erepair to update VPD
                l_rc = setVpdFailedLanes(rxBusTgt, txBusTgt,
                                         rx_lanes, thrExceeded);
                if (SUCCESS != l_rc)
                {
                    PRDF_ERR( PRDF_FUNC "setVpdFailedLanes() failed: "
                              "rxBusTgt=0x%08x txBusTgt=0x%08x",
                              getHuid(rxBusTgt), getHuid(txBusTgt) );
                    break;
                }
                if( thrExceeded )
                {
                    i_sc.service_data->SetErrorSig(
                                            PRDFSIG_ERepair_FWThrExceeded );
                }
            }
        }

        if (i_spareDeployed && !thrExceeded)
        {
            // Update lists of lanes from VPD
            rx_vpdLanes.clear(); tx_vpdLanes.clear();
            l_rc = getVpdFailedLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes);
            if (SUCCESS != l_rc)
            {
                PRDF_ERR( PRDF_FUNC "getVpdFailedLanes() before power down "
                          "failed: rxBusTgt=0x%08x", getHuid(rxBusTgt) );
                break;
            }

            // Power down all lanes that have been saved in VPD
            l_rc = powerDownLanes(rxBusTgt, rx_vpdLanes, tx_vpdLanes);
            if (SUCCESS != l_rc)
            {
                PRDF_ERR( PRDF_FUNC "powerDownLanes() failed: rxBusTgt=0x%08x",
                          getHuid(rxBusTgt) );
                break;
            }
        }
        else
        {
            // Make predictive
            i_sc.service_data->SetServiceCall();
        }
    } while (0);

    // Clear FIRs
    if (rxBusTgt)
    {
        l_rc |= erepairFirIsolation(rxBusTgt);
        l_rc |= clearIOFirs(rxBusTgt);
    }

    if ( i_spareDeployed )
    {
        l_rc |= cleanupSecondaryFirBits( i_chip, i_busType, i_busPos );
    }

    // This return code gets returned by the plugin code back to the rule code.
    // So, we do not want to give a return code that the rule code does not
    // understand. So far, there is no need return a special code, so always
    // return SUCCESS.
    if ( SUCCESS != l_rc )
    {
        PRDF_ERR( PRDF_FUNC "i_chip: 0x%08x i_busType:%d i_busPos:%d",
                  i_chip->GetId(), i_busType, i_busPos );

        i_sc.service_data->SetErrorSig( PRDFSIG_ERepair_ERROR );
        CalloutUtil::defaultError( i_sc );
    }

    return SUCCESS;

    #undef PRDF_FUNC
}
Ejemplo n.º 2
0
errlHndl_t main( ATTENTION_VALUE_TYPE i_attentionType,
                 const AttnList & i_attnList )
{
    PRDF_ENTER( "PRDF::main() Global attnType=%04X", i_attentionType );

    // will unlock when going out of scope
    PRDF_SYSTEM_SCOPELOCK;

    g_prd_errlHndl = NULL;

    uint32_t rc =  SUCCESS;
    // clears all the chips saved to stack during last analysis
    ServiceDataCollector::clearChipStack();

    if(( g_initialized == false)&&(NULL ==systemPtr))
    {
        g_prd_errlHndl = noLock_initialize();
        if(g_prd_errlHndl != NULL) rc = PRD_NOT_INITIALIZED;
    }

    ServiceDataCollector serviceData;
    STEP_CODE_DATA_STRUCT sdc;
    sdc.service_data = &serviceData;
    SYSTEM_DEBUG_CLASS sysdebug;

    sysdebug.Reinitialize(i_attnList); //Refresh sysdebug with latest Attn data

    ////////////////////////////////////////////////////////////////////////////
    // Normalize global attn type (ie 11,12,13,....) to (CHECKSTOP, RECOVERED,
    // SPECIAL..)
    ////////////////////////////////////////////////////////////////////////////

    if ( i_attentionType == INVALID_ATTENTION_TYPE ||
         i_attentionType >= END_ATTENTION_TYPE )
    {
        rc = PRD_INVALID_ATTENTION_TYPE;
        PRDF_ERR( "PrdMain: Invalid attention type! Global:%x",
                  i_attentionType );
        i_attentionType = RECOVERABLE; // This will prevent RAS service problems
    }

    // link to the right service Generator
    ServiceGeneratorClass & serviceGenerator =
        ServiceGeneratorClass::ThisServiceGenerator();

    // Initialize the SDC error log. Required for GenerateSrcPfa() call below.
    serviceGenerator.createInitialErrl( i_attentionType );

    // check for something wrong
    if ( g_initialized == false || rc != SUCCESS || systemPtr == NULL )
    {
        if(rc == SUCCESS)
        {
            rc = PRD_NOT_INITIALIZED;
        }
        PRDF_ERR("PrdMain: PRD failed. RC=%x",rc );
        // we are not going to do an analysis - so fill out the Service Data
        (serviceData.GetErrorSignature())->setSigId(rc);
        serviceData.SetCallout(SP_CODE);
        serviceData.SetThresholdMaskId(0); // Sets AT_THRESHOLD, DEGRADED,
                                           // SERVICE_CALL
    }
    else  // do the analysis
    {
        // flush Cache so that SCR reads access hardware
        RegDataCache::getCachedRegisters().flush();

        serviceData.SetAttentionType(i_attentionType);

        // capture time of day
        serviceGenerator.SetErrorTod( i_attentionType, serviceData );

        if(serviceGenerator.QueryLoggingBufferFull())
        {
            serviceData.SetFlooding();
        }

        int32_t analyzeRc = systemPtr->Analyze(sdc, i_attentionType);
        // flush Cache to free up the memory
        RegDataCache::getCachedRegisters().flush();
        ScanFacility      & l_scanFac = ScanFacility::Access();
        //delete all the wrapper register objects since these were created
        //just for plugin code
        l_scanFac.ResetPluginRegister();
        SystemSpecific::postAnalysisWorkarounds(sdc);
        if(analyzeRc != SUCCESS && g_prd_errlHndl == NULL)
        {
            // We have a bad RC, but no error log - Fill out SDC and have
            // service generator make one
            (serviceData.GetErrorSignature())->setErrCode(
                                                    (uint16_t)analyzeRc );
            serviceData.SetCallout(SP_CODE);
            serviceData.SetServiceCall();
            // We don't want to gard unless we have a good
            // return code
            serviceData.Gard(GardAction::NoGard);
        }
    }

    if(g_prd_errlHndl != NULL)
    {
        PRDF_INF("PRDTRACE: PrdMain: g_prd_errlHndl != NULL");
        PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_MED,
                                    EPUB_PRC_SP_CODE );

        // This forces any previous errls to be committed
        g_prd_errlHndl = NULL;

        // pw 597903 -- Don't GARD if we got a global error.
        serviceData.Gard(GardAction::NoGard);
    }

    g_prd_errlHndl = serviceGenerator.GenerateSrcPfa( i_attentionType,
                                                      serviceData );

    // Sleep for 20msec to let attention lines settle if we are at threshold.
    if ( (g_prd_errlHndl == NULL) && serviceData.IsAtThreshold() )
    {
        PlatServices::milliSleep( 0, 20 );
    }

    RasServices::SetTerminateOnCheckstop(true);

    PRDF_EXIT( "PRDF::main()" );

    return(g_prd_errlHndl.release());
}
Ejemplo n.º 3
0
errlHndl_t main( ATTENTION_VALUE_TYPE i_attentionType,
                 const AttnList & i_attnList )
{
    PRDF_ENTER( "PRDF::main() Global attnType=%04X", i_attentionType );

    // These have to be outside of system scope lock
    errlHndl_t retErrl = NULL;
    bool initiateHwudump = false;
    TARGETING::TargetHandle_t dumpTrgt = NULL;
    errlHndl_t dumpErrl = NULL;
    uint32_t dumpErrlActions = 0;

    { // system scope lock starts ------------------------------------------

    // will unlock when going out of scope
    PRDF_SYSTEM_SCOPELOCK;

    g_prd_errlHndl = NULL;

    uint32_t rc =  SUCCESS;
    // clears all the chips saved to stack during last analysis
    ServiceDataCollector::clearChipStack();

    if(( g_initialized == false)&&(NULL ==systemPtr))
    {
        g_prd_errlHndl = noLock_initialize();
        if(g_prd_errlHndl != NULL) rc = PRD_NOT_INITIALIZED;
    }

    ServiceDataCollector serviceData;
    STEP_CODE_DATA_STRUCT sdc;
    sdc.service_data = &serviceData;
    SYSTEM_DEBUG_CLASS sysdebug;

    sysdebug.Reinitialize(i_attnList); //Refresh sysdebug with latest Attn data

    ////////////////////////////////////////////////////////////////////////////
    // Normalize global attn type (ie 11,12,13,....) to (CHECKSTOP, RECOVERED,
    // SPECIAL..)
    ////////////////////////////////////////////////////////////////////////////

    if ( i_attentionType == INVALID_ATTENTION_TYPE ||
         i_attentionType >= END_ATTENTION_TYPE )
    {
        rc = PRD_INVALID_ATTENTION_TYPE;
        PRDF_ERR( "PrdMain: Invalid attention type! Global:%x",
                  i_attentionType );
        i_attentionType = RECOVERABLE; // This will prevent RAS service problems
    }

    // link to the right service Generator
    ServiceGeneratorClass & serviceGenerator =
        ServiceGeneratorClass::ThisServiceGenerator();

    // Initialize the SDC error log. Required for GenerateSrcPfa() call below.
    serviceGenerator.createInitialErrl( i_attentionType );

    // check for something wrong
    if ( g_initialized == false || rc != SUCCESS || systemPtr == NULL )
    {
        if(rc == SUCCESS)
        {
            rc = PRD_NOT_INITIALIZED;
        }
        PRDF_ERR("PrdMain: PRD failed. RC=%x",rc );
        // we are not going to do an analysis - so fill out the Service Data
        (serviceData.GetErrorSignature())->setSigId(rc);
        serviceData.SetCallout(SP_CODE);
        serviceData.SetCallout( NextLevelSupport_ENUM, MRU_LOW );
        serviceData.SetThresholdMaskId(0); // Sets AT_THRESHOLD, DEGRADED,
                                           // SERVICE_CALL
    }
    else  // do the analysis
    {
        // flush Cache so that SCR reads access hardware
        RegDataCache::getCachedRegisters().flush();

        serviceData.setPrimaryAttnType(i_attentionType);

        // Set the time in which PRD handled the error.
        Timer timeOfError;
        PlatServices::getCurrentTime( timeOfError );
        serviceData.SetTOE( timeOfError );

        ServiceDataCollector l_tempSdc = serviceData;
        l_tempSdc.setPrimaryPass();
        sdc.service_data = &l_tempSdc;

        int32_t analyzeRc = systemPtr->Analyze( sdc, i_attentionType );

        if( PRD_SCAN_COMM_REGISTER_ZERO == analyzeRc )
        {
            // So, the first pass has failed. Hence, there are no primary
            // bits set. We must start second pass to see if there are any
            //secondary bits set.
            sdc.service_data = &serviceData;

            #if !defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)
            ForceSyncAnalysis( l_tempSdc ); // save SDC till end of primary pass
            #endif

            // starting the second pass
            PRDF_INF( "PRDF::main() No bits found set in first pass,"
                      " starting second pass" );
            sysdebug.initAttnPendingtatus( ); //for the second  pass

            if( l_tempSdc.isSecondaryErrFound() )
            {
                sdc.service_data->setSecondaryErrFlag();
            }

            analyzeRc = systemPtr->Analyze( sdc, i_attentionType );

            // merging capture data of primary pass with capture data of
            // secondary pass for better FFDC.
            serviceData.GetCaptureData().mergeData(
                                    l_tempSdc.GetCaptureData());

            #if !defined(__HOSTBOOT_MODULE) && !defined(__HOSTBOOT_RUNTIME)
            // save SDC till end of secondary pass
            ForceSyncAnalysis( serviceData );
            #endif
        }
        else
        {
            serviceData = l_tempSdc;
            sdc.service_data = &serviceData;
        }

        // flush Cache to free up the memory
        RegDataCache::getCachedRegisters().flush();
        ScanFacility      & l_scanFac = ScanFacility::Access();
        //delete all the wrapper register objects since these were created
        //just for plugin code
        l_scanFac.ResetPluginRegister();
        if(analyzeRc != SUCCESS && g_prd_errlHndl == NULL)
        {
            (serviceData.GetErrorSignature())->setErrCode(
                                                    (uint16_t)analyzeRc );
            serviceData.SetCallout(SP_CODE);
            serviceData.SetCallout( NextLevelSupport_ENUM, MRU_LOW );
            serviceData.SetServiceCall();
            // We don't want to gard unless we have a good
            // return code
            serviceData.Gard(GardAction::NoGard);
        }
    }

    if(g_prd_errlHndl != NULL)
    {
        PRDF_INF("PRDTRACE: PrdMain: g_prd_errlHndl != NULL");
        PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_MED,
                                    EPUB_PRC_SP_CODE );
        // This is a precautionary step. There is a possibilty that if
        // severity for g_prd_errlHndl is Predictve and there is only
        // EPUB_PRC_SP_CODE callout than it will be changed to tracing event.
        // So adding EPUB_PRC_LVL_SUPP to avoid this.
        PRDF_ADD_PROCEDURE_CALLOUT( g_prd_errlHndl, SRCI_PRIORITY_LOW,
                                    EPUB_PRC_LVL_SUPP );

        // This forces any previous errls to be committed
        g_prd_errlHndl = NULL;

        // pw 597903 -- Don't GARD if we got a global error.
        serviceData.Gard(GardAction::NoGard);
    }

    g_prd_errlHndl = serviceGenerator.GenerateSrcPfa( i_attentionType,
                                                      serviceData,
                                                      initiateHwudump,
                                                      dumpTrgt,
                                                      dumpErrl,
                                                      dumpErrlActions);

    // Sleep for 20msec to let attention lines settle if we are at threshold.
    if ( (g_prd_errlHndl == NULL) && serviceData.IsAtThreshold() )
    {
        PlatServices::milliSleep( 0, 20 );
    }

    retErrl = g_prd_errlHndl.release();

    } // system scope lock ends ------------------------------------------


    if ( true == initiateHwudump )
    {
        PlatServices::initiateUnitDump( dumpTrgt, dumpErrl, dumpErrlActions );
    }

    PRDF_EXIT( "PRDF::main()" );

    return retErrl;
}