Beispiel #1
0
void task_core_data( task_t * i_task )
{

    errlHndl_t  l_err = NULL;       //Error handler
    tracDesc_t  l_trace = NULL;     //Temporary trace descriptor
    int         rc = 0;     //return code
    bulk_core_data_task_t * l_bulk_core_data_ptr = (bulk_core_data_task_t *)i_task->data_ptr;
    GpeGetCoreDataParms * l_parms = (GpeGetCoreDataParms *)(l_bulk_core_data_ptr->gpe_req.parameter);
    gpe_bulk_core_data_t  * l_temp = NULL;

    do
    {
        //First, check to see if the previous GPE request still running
        //A request is considered idle if it is not attached to any of the
        //asynchronous request queues
        if( !(async_request_is_idle(&l_bulk_core_data_ptr->gpe_req.request)) )
        {
            //This should not happen unless there's a problem
            //Trace 1 time
            if( !G_queue_not_idle_traced )
            {
                TRAC_ERR("Core data GPE is still running \n");
                G_queue_not_idle_traced = TRUE;
            }
            break;
        }

        //Need to complete collecting data for all assigned cores from previous interval
        //and tick 0 is the current tick before collect data again.
        if( (l_bulk_core_data_ptr->current_core == l_bulk_core_data_ptr->end_core)
            &&
            ((CURRENT_TICK & (MAX_NUM_TICKS - 1)) != 0) )
        {
            PROC_DBG("Not collect data. Need to wait for tick.\n");
            break;
        }

        //Check to see if the previously GPE request has successfully completed
        //A request is not considered complete until both the engine job
        //has finished without error and any callback has run to completion.

        if( async_request_completed(&l_bulk_core_data_ptr->gpe_req.request)
            &&
            CORE_PRESENT(l_bulk_core_data_ptr->current_core) )
        {
            //If the previous GPE request succeeded then swap core_data_ptr
            //with the global one. The gpe routine will write new data into
            //a buffer that is not being accessed by the RTLoop code.

            PROC_DBG( "Swap core_data_ptr [%x] with the global one\n",
                     l_bulk_core_data_ptr->current_core );

            //debug only
#ifdef PROC_DEBUG
            print_core_status(l_bulk_core_data_ptr->current_core);
            print_core_data_sensors(l_bulk_core_data_ptr->current_core);
#endif

            l_temp = l_bulk_core_data_ptr->core_data_ptr;
            l_bulk_core_data_ptr->core_data_ptr =
                    G_core_data_ptrs[l_bulk_core_data_ptr->current_core];
            G_core_data_ptrs[l_bulk_core_data_ptr->current_core] = l_temp;

            //Core data has been collected so set the bit in global mask.
            //AMEC code will know which cores to update sensors for. AMEC is
            //responsible for clearing the bit later on.
            G_updated_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core);

            // Presumptively clear the empath error mask
            G_empath_error_core_mask &=
                    ~(CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core));

            // The gpe_data collection code has to handle the workaround for
            // HW280375.  Two new flags have been added to the OHA_RO_STATUS_REG
            // image to indicate whether the EMPATH collection failed, and
            // whether it was due to an "expected" error that we can ignore
            // (we can ignore the data as well), or an "unexpected" error that
            // we will create an informational log one time.
            //
            // The "expected" errors are very rare in practice, in fact we may
            // never even see them unless running a specific type of workload.
            // If you want to test the handling of expected errors compile the
            // GPE code with -DINJECT_HW280375_ERRORS which will inject an error
            // approximately every 1024 samples
            //
            // To determine if the expected error has occurred inspect the
            // CoreDataOha element of the CoreData structure written by the GPE
            // core data job.  The OHA element contains the oha_ro_status_reg.
            // Inside the OHA status register is a 16 bit reserved field.
            // gpe_data.h defines two masks that can be applied against the
            // reserved field to check for these errors:
            // CORE_DATA_EXPECTED_EMPATH_ERROR
            // CORE_DATA_UNEXPECTED_EMPATH_ERROR
            // Also, a 4-bit PCB parity + error code is saved at bit position:
            // CORE_DATA_EMPATH_ERROR_LOCATION, formally the length is
            // specified by: CORE_DATA_EMPATH_ERROR_BITS
            gpe_bulk_core_data_t *l_core_data =
                    G_core_data_ptrs[l_bulk_core_data_ptr->current_core];

            // We will trace the errors, but only a certain number of
            // times, we will only log the unexpected error once.
#define OCC_EMPATH_ERROR_THRESH 10
            static uint32_t L_expected_emp_err_cnt = 0;
            static uint32_t L_unexpected_emp_err_cnt = 0;

            // Check the reserved field for the expected or the unexpected error flag
            if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR)
                ||
                (l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR))
            {
                // Indicate empath error on current core
                G_empath_error_core_mask |=
                        CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core);

                // Save the high and low order words of the OHA status reg
                uint32_t l_oha_reg_high = l_core_data->oha.oha_ro_status_reg.words.high_order;
                uint32_t l_oha_reg_low = l_core_data->oha.oha_ro_status_reg.words.low_order;

                // Handle each error case
                if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR)
                    &&
                    (L_expected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH))
                {
                    L_expected_emp_err_cnt++;
                    TRAC_IMP("Expected empath collection error occurred %d time(s)! Core = %d",
                             L_expected_emp_err_cnt,
                             l_bulk_core_data_ptr->current_core);
                    TRAC_IMP("OHA status register: 0x%4.4x%4.4x",
                             l_oha_reg_high, l_oha_reg_low);
                }

                if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR)
                    &&
                    (L_unexpected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH))
                {
                    L_unexpected_emp_err_cnt++;
                    TRAC_ERR("Unexpected empath collection error occurred %d time(s)! Core = %d",
                             L_unexpected_emp_err_cnt,
                             l_bulk_core_data_ptr->current_core);
                    TRAC_ERR("OHA status register: 0x%4.4x%4.4x",
                             l_oha_reg_high, l_oha_reg_low);

                    // Create and commit an informational error the first
                    // time this occurs.
                    if (L_unexpected_emp_err_cnt == 1)
                    {
                        TRAC_IMP("Logging unexpected empath collection error 1 time only.");
                        /*
                        * @errortype
                        * @moduleid    PROC_TASK_CORE_DATA_MOD
                        * @reasoncode  INTERNAL_HW_FAILURE
                        * @userdata1   OHA status reg high
                        * @userdata2   OHA status reg low
                        * @userdata4   ERC_PROC_CORE_DATA_EMPATH_ERROR
                        * @devdesc     An unexpected error occurred while
                        *              collecting core empath data.
                        */
                        l_err = createErrl(
                                PROC_TASK_CORE_DATA_MOD, //modId
                                INTERNAL_HW_FAILURE,     //reason code
                                ERC_PROC_CORE_DATA_EMPATH_ERROR, //Extended reason code
                                ERRL_SEV_INFORMATIONAL,  //Severity
                                NULL,                    //Trace
                                DEFAULT_TRACE_SIZE,      //Trace Size
                                l_oha_reg_high,          //userdata1
                                l_oha_reg_low);          //userdata2

                        commitErrl(&l_err);
                    }
                }
            }
        }
Beispiel #2
0
void task_centaur_control( task_t * i_task )
{
    errlHndl_t            l_err     = NULL;    // Error handler
    int                   rc        = 0;       // Return code
    uint32_t              l_cent;
    amec_centaur_t        *l_cent_ptr = NULL;
    static uint8_t        L_scom_timeout[MAX_NUM_CENTAURS] = {0}; //track # of consecutive failures
    static bool           L_gpe_scheduled = FALSE;
    static uint8_t        L_gpe_fail_logged = 0;
    static bool           L_gpe_idle_traced = FALSE;
    static bool           L_gpe_had_1_tick = FALSE;

    // Pointer to the task data structure
    centaur_control_task_t * l_centControlTask =
            (centaur_control_task_t *) i_task->data_ptr;


    // Pointer to parameter field for GPE request
    GpeScomParms * l_parms =
          (GpeScomParms *)(l_centControlTask->gpe_req.parameter);

    do
    {
        l_cent = l_centControlTask->curCentaur;
        l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur;

        //First, check to see if the previous GPE request still running
        //A request is considered idle if it is not attached to any of the
        //asynchronous request queues
        if( !(async_request_is_idle(&l_centControlTask->gpe_req.request)) )
        {
            L_scom_timeout[l_cent]++;
            //This can happen due to variability in when the task runs
            if(!L_gpe_idle_traced && L_gpe_had_1_tick)
            {
                TRAC_INFO("task_centaur_control: GPE is still running. cent[%d]", l_cent);
                l_centControlTask->traceThresholdFlags |= CENTAUR_CONTROL_GPE_STILL_RUNNING;
                L_gpe_idle_traced = TRUE;
            }
            L_gpe_had_1_tick = TRUE;
            break;
        }
        else
        {
            //Request is idle
            L_gpe_had_1_tick = FALSE;
            if(L_gpe_idle_traced)
            {
                TRAC_INFO("task_centaur_control: GPE completed. cent[%d]", l_cent);
                L_gpe_idle_traced = FALSE;
            }
        }

        //check scom status
        if(L_gpe_scheduled)
        {
            if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc)
            {
                if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent)))
                {
                    // Check if the centaur has a channel checkstop. If it does,
                    // then do not log any errors. We also don't want to throttle
                    // a centaur that is in this condition.
                    if(!(cent_chan_checkstop(l_cent)))
                    {
                        L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> l_cent;
                        TRAC_ERR("task_centaur_control: gpe_scom_centaur failed. l_cent=%d rc=%x, index=0x%08x", l_cent, l_parms->rc, l_parms->errorIndex);

                        /* @
                         * @errortype
                         * @moduleid    CENT_TASK_CONTROL_MOD
                         * @reasoncode  CENT_SCOM_ERROR
                         * @userdata1   rc - Return code of scom operation
                         * @userdata2   index of scom operation that failed
                         * @userdata4   OCC_NO_EXTENDED_RC
                         * @devdesc     OCC access to centaur failed
                         */
                        l_err = createErrl(
                                CENT_TASK_CONTROL_MOD,                  // modId
                                CENT_SCOM_ERROR,                        // reasoncode
                                OCC_NO_EXTENDED_RC,                     // Extended reason code
                                ERRL_SEV_PREDICTIVE,                    // Severity
                                NULL,                                   // Trace Buf
                                DEFAULT_TRACE_SIZE,                     // Trace Size
                                l_parms->rc,                            // userdata1
                                l_parms->errorIndex                     // userdata2
                                );

                        addUsrDtlsToErrl(l_err,                                  //io_err
                                (uint8_t *) &(l_centControlTask->gpe_req.ffdc),  //i_dataPtr,
                                sizeof(PoreFfdc),                                //i_size
                                ERRL_USR_DTL_STRUCT_VERSION_1,                   //version
                                ERRL_USR_DTL_BINARY_DATA);                       //type

                        //callout the centaur
                        addCalloutToErrl(l_err,
                                         ERRL_CALLOUT_TYPE_HUID,
                                         G_sysConfigData.centaur_huids[l_cent],
                                         ERRL_CALLOUT_PRIORITY_MED);

                        //callout the processor
                        addCalloutToErrl(l_err,
                                         ERRL_CALLOUT_TYPE_HUID,
                                         G_sysConfigData.proc_huid,
                                         ERRL_CALLOUT_PRIORITY_MED);

                        commitErrl(&l_err);
                    }
                }//if(l_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent))

                //Request failed. Keep count of failures and request a reset if we reach a
                //max retry count
                L_scom_timeout[l_cent]++;
                if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT)
                {
                    break;
                }

            }//if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc)
            else
            {
                //request completed successfully.  reset the timeout.
                L_scom_timeout[l_cent] = 0;
            }
        }//if(L_gpe_scheduled)
Beispiel #3
0
int
_centaur_configuration_create(int i_bar, int i_slave, int i_setup)
{
    CentaurConfiguration config;
    int i, designatedSync, diffInit;
    int64_t rc;                 /* Must be copied to global struct. */
    mcfgpr_t mcfgpr;
    mcifir_t mcifir;
    mcsmode0_t mcsmode0;
    pba_slvctln_t slvctl;
    uint64_t diffMask, addrAccum, bar, mask, base;
    PoreFlex request;

    // Start by clearing the local structure and setting the error flag.
    memset(&config, 0, sizeof(config));
    config.configRc = CENTAUR_NOT_CONFIGURED;

    designatedSync = -1;

    do {
        // Basic consistency checks

        if ((i_bar < 0) || (i_bar >= PBA_BARS) ||
            (i_slave < 0) || (i_slave >= PBA_SLAVES)) {
            
            rc = CENTAUR_INVALID_ARGUMENT;
            break;
        }

        
        // Create the setups for the GPE procedures. The 'dataParms' are the
        // setup for accessing the Centaur sensor cache.  The 'scomParms' are
        // the setup for accessing Centaur SCOMs. 

        rc = gpe_pba_parms_create(&(config.dataParms),
                                  PBA_SLAVE_PORE_GPE,
                                  PBA_WRITE_TTYPE_CI_PR_W,
                                  PBA_WRITE_TTYPE_DC,
                                  PBA_READ_TTYPE_CL_RD_NC);
        if (rc) {
            rc = CENTAUR_DATA_SETUP_ERROR;
            break;
        }

        rc = gpe_pba_parms_create(&(config.scomParms),
                                  PBA_SLAVE_PORE_GPE,
                                  PBA_WRITE_TTYPE_CI_PR_W,
                                  PBA_WRITE_TTYPE_DC,
                                  PBA_READ_TTYPE_CI_PR_RD);
        if (rc) {
            rc = CENTAUR_SCOM_SETUP_ERROR;
            break;
        }


        // Go into each MCS on the chip, and for all enabled MCS get a couple
        // of SCOMs and check configuration items for correctness. If any of
        // the Centaur are configured, exactly one of the MCS must be
        // designated to receive the SYNC commands.

        // Note that the code uniformly treats SCOM failures of the MCFGPR
        // registers as an unconfigured Centaur. This works both for Murano,
        // which only defines the final 4 MCS, as well as for our VBU models
        // where some of the "valid" MCS are not in the simulation models.

        for (i = 0; i < PGP_NCENTAUR; i++) {

            // SW273928: New function added for FW820, when centaur has channel
            // checkstop, we consider centaur is not usable so treat it as 
            // deconfigured. Note that the current implementation assumes when 
            // centaur is dead, its mcs is also dead, which is wrong. However,
            // it only concerns when MCS happens to be the SYNC master because 
            // the gpe procedure only tries to talk to centaurs regardless what 
            // MCS status it knows about. In this particular case,
            // the procedure will turn on SYNC on a different MCS with
            // valid centaur. According to Eric Retter, it would be ok for 
            // HW to have more MCS turned on as SYNC master as long as FW
            // only send SYNC command to one of them. 

            rc = _getscom(MCS_ADDRESS(MCIFIR, i), &(mcifir.value),
                          SCOM_TIMEOUT);
            if (rc) {
                rc = 0;
                config.baseAddress[i] = 0;
                continue;
            }

            if (mcifir.fields.channel_fail_signal_active) continue;

            rc = _getscom(MCS_ADDRESS(MCFGPR, i), &(mcfgpr.value),
                          SCOM_TIMEOUT);
            if (rc) {
                rc = 0;
                config.baseAddress[i] = 0;
                continue;
            }

            if (!mcfgpr.fields.mcfgprq_valid) continue;

            rc = _getscom(MCS_ADDRESS(MCSMODE0, i), &(mcsmode0.value),
                          SCOM_TIMEOUT);
            if (rc) {
                PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n",
                       (uint32_t)rc, i);
                rc = CENTAUR_MCSMODE0_SCOM_FAILURE;
                break;
            }


            // We require that the MCFGRP_19_IS_HO_BIT be set in the mode
            // register.  We do not support the option of this bit not being
            // set, and all of our procedures will set bit 19 of the PowerBus
            // address to indicate that OCC is making the access.

            if (!mcsmode0.fields.mcfgrp_19_is_ho_bit) {

                PRINTD("MCSMODE0(%d).mcfgrp_19_is_ho_bit == 0\n", i);
                rc = CENTAUR_MCSMODE0_19_FAILURE;
                break;
            }
                

            // The 14-bit base-address is moved to begin at bit 14 in the
            // 64-bit PowerBus address. The low-order bit of this address (bit
            // 19 mentioned above which is bit 27 as an address bit) must be 0
            // - otherwise there is confusion over who's controlling this
            // bit.

            config.baseAddress[i] = 
                ((uint64_t)(mcfgpr.fields.mcfgprq_base_address)) << 
                (64 - 14 - 14);

            if (config.baseAddress[i] & 0x0000001000000000ull) {

                PRINTD("Centaur base address %d has bit 27 set\n", i);
                rc = CENTAUR_ADDRESS_27_FAILURE;
                break;
            }


            // If this MCS is configured to be the designated SYNC unit, it
            // must be the only one. 

            if (mcsmode0.fields.enable_centaur_sync) {

                if (designatedSync > 0) {

                    PRINTD("Both MCS %d and %d are designated "
                           "for Centaur Sync\n",
                           designatedSync, i);
                    rc = CENTAUR_MULTIPLE_DESIGNATED_SYNC;
                    break;

                } else {

                    designatedSync = i;
                }
            }


            // Add the Centaur to the configuration

            config.config |= (CHIP_CONFIG_MCS(i) | CHIP_CONFIG_CENTAUR(i));
        }

        if (rc) break;


        // If Centaur are configured, make sure at least one of the MCS will
        // handle the SYNC. If so, convert its base address into an address
        // for issuing SYNC commands by setting bits 27 (OCC) 28 and 29
        // (Sync), then insert this address into the extended address field of
        // a PBA slave control register image. gsc_scom_centaur() then merges
        // this extended address into the PBA slave control register (which
        // has been set up for Centaur SCOM) to do the SYNC.

        // In the override mode (i_setup > 1) we tag the first valid MCS
        // to recieve the sync if the firmware has not set it up correctly.

        if (config.config) {

            if (designatedSync < 0) {

                if (i_setup <= 1) {

                    PRINTD("No MCS is designated for Centaur SYNC\n");
                    rc = CENTAUR_NO_DESIGNATED_SYNC;
                    break;

                } else {

                    designatedSync = 
                        cntlz32(left_justify_mcs_config(config.config));

                    rc = _getscom(MCS_ADDRESS(MCSMODE0, designatedSync), 
                                  &(mcsmode0.value),
                                  SCOM_TIMEOUT);
                    if (rc) {
                        PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n",
                               (uint32_t)rc, designatedSync);
                        rc = CENTAUR_MCSMODE0_SCOM_FAILURE;
                        break;
                    }

                    mcsmode0.fields.enable_centaur_sync = 1;

                    rc = _putscom(MCS_ADDRESS(MCSMODE0, designatedSync), 
                                  mcsmode0.value,
                                  SCOM_TIMEOUT);
                    if (rc) {
                        PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n",
                               (uint32_t)rc, designatedSync);
                        rc = CENTAUR_MCSMODE0_SCOM_FAILURE;
                        break;
                    }
                }
            }

            base = config.baseAddress[designatedSync] | 0x0000001c00000000ull;

            slvctl.value = 0;
            slvctl.fields.extaddr = (base & 0x000001fff8000000ull) >> 27;

            config.syncSlaveControl = slvctl.value;
        }

        
        // At this point we have one or more enabled MCS and they pass the
        // initial configuration sniff test. We can now implement the option
        // to configure the PBA BAR and BAR MASK correctly to allow access to
        // these Centaur. We do this by computing the minimum BAR mask that
        // covers all of the Centaur base addresses. This is done by
        // accumulating a difference mask of the base addresses and finding
        // the first set bit in the mask.
        //
        // Note that we do the configuration here on demand, but always do the
        // correctness checking as the next step.

        if (i_setup && (config.config != 0)) {

            diffInit = 0;
            diffMask = 0;       /* GCC happiness */
            addrAccum = 0;      /* GCC happiness */
            
            for (i = 0; i < PGP_NCENTAUR; i++) {

                if (config.baseAddress[i] != 0) {

                    if (!diffInit) {

                        diffInit = 1;
                        diffMask = 0;
                        addrAccum = config.baseAddress[i];

                    } else {

                        diffMask |= 
                            (config.baseAddress[i] ^ addrAccum);
                        addrAccum |= config.baseAddress[i];
                    }

                    if (0) {

                        // Debug

                        printk("i:%d baseAddress: 0x%016llx "
                               "diffMask: 0x%016llx, addrAccum: 0x%016llx\n",
                               i, config.baseAddress[i], diffMask, addrAccum);
                    }
                }
            }

            // The mask must cover all differences - and must also have at
            // least bit 27 set. The mask register contains only the mask. The
            // BAR is set to the accumulated address outside of the mask. The
            // BAR also contains a scope field which defaults to 0 (Nodal
            // Scope) for Centaur inband access.

            diffMask |= 0x0000001000000000ull;            
            mask = 
                ((1ull << (64 - cntlz64(diffMask))) - 1) &
                PBA_BARMSKN_MASK_MASK;

            rc = _putscom(PBA_BARMSKN(i_bar), mask, SCOM_TIMEOUT);
            if (rc) {
                PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARMSKN(%d)\n",
                       (uint32_t)rc, i_bar);
                rc = CENTAUR_BARMSKN_PUTSCOM_FAILURE;
                break;
            }

            rc = _putscom(PBA_BARN(i_bar), addrAccum & ~mask, SCOM_TIMEOUT);
            if (rc) {
                PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARN(%d)\n",
                       (uint32_t)rc, i_bar);
                rc = CENTAUR_BARN_PUTSCOM_FAILURE;
                break;
            }
        }


        // Do an independent check that every Centaur base address
        // can be generated by the combination of the current BAR and
        // BAR Mask, along with the initial requirement that the mask must
        // include at least bits 27:43.

        if (config.config != 0) {

            rc = _getscom(PBA_BARN(i_bar), &bar, SCOM_TIMEOUT);
            if (rc) {
                PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARN(%d)\n",
                       (uint32_t)rc, i_bar);
                rc = CENTAUR_BARN_GETSCOM_FAILURE;
                break;
            }

            rc = _getscom(PBA_BARMSKN(i_bar), &mask, SCOM_TIMEOUT);

            if (rc) {
                PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARMSKN(%d)\n",
                       (uint32_t)rc, i_bar);
                rc = CENTAUR_BARMSKN_GETSCOM_FAILURE;
                break;
            }

            bar = bar & PBA_BARN_ADDR_MASK;
            mask = mask & PBA_BARMSKN_MASK_MASK;

            if ((mask & 0x0000001ffff00000ull) != 0x0000001ffff00000ull) {

                PRINTD("PBA BAR mask (%d) does not cover bits 27:43\n", i_bar);
                rc = CENTAUR_MASK_ERROR;
                break;
            }

            for (i = 0; i < PGP_NCENTAUR; i++) {

                if (config.baseAddress[i] != 0) {

                    if ((config.baseAddress[i] & ~mask) != 
                        (bar & ~mask)) {

                        PRINTD("BAR/Mask (%d) error for MCS/Centaur %d\n"
                               "    base = 0x%016llx\n"
                               "    bar  = 0x%016llx\n"
                               "    mask = 0x%016llx\n",

                               i_bar, i, config.baseAddress[i], bar, mask);
                        rc = CENTAUR_BAR_MASK_ERROR;
                        break;
                    }
                }
            }

            if (rc) break;
        }


        // At this point the structure is initialized well-enough that it can
        // be used by gpe_scom_centaur(). We run gpe_scom_centaur() to collect
        // the CFAM ids of the chips.  Prior to this we copy our local copy
        // into the global read-only data structure. (Note that GPE can DMA
        // under the OCC TLB memory protection.) In order for
        // gpe_scom_centaur() to run the global configuration must be valid
        // (configRc == 0) - so we provisionally mark it valid (and will
        // invalidate it later if errors occur here).

        // Note however that if no Centaur are present then we're already
        // done.

        // It's assumed that this procedure is being run before threads have
        // started, therefore we must poll for completion of the GPE program.
        // Assuming no contention for GPE1 this procedure should take a few
        // microseconds at most to complete.

        if (0) {

            // Debug for Simics - only enable MCS 5

            config.baseAddress[0] =
                config.baseAddress[1] =
                config.baseAddress[2] =
                config.baseAddress[3] =
                config.baseAddress[4] =
                config.baseAddress[6] =
                config.baseAddress[7] = 0;
        }


        config.configRc = 0;
        memcpy_real(&G_centaurConfiguration, &config, sizeof(config));

        if (config.config == 0) break;

        S_scomList.scom = CENTAUR_DEVICE_ID;
        S_scomList.commandType = GPE_SCOM_READ_VECTOR;
        S_scomList.pData = G_centaurConfiguration.deviceId;

        S_parms.scomList = CAST_POINTER(uint64_t, &S_scomList);
        S_parms.entries = 1;
        S_parms.options = 0;
        
        pore_flex_create(&request,
                         &G_pore_gpe1_queue,
                         gpe_scom_centaur,
                         (uint32_t)(&S_parms),
                         SSX_MILLISECONDS(10), /* Timeout */
                         0, 0, 0);

        rc = pore_flex_schedule(&request);

        if (rc) break;

        while (!async_request_is_idle((AsyncRequest*)(&request)));

        if (!async_request_completed((AsyncRequest*)(&request)) ||
            (S_parms.rc != 0)) {

            PRINTD("gpe_scom_centaur() for CENTAUR_DEVICE_ID failed:\n"
                   "    Async state                   = 0x%02x\n"
                   "    gpe_scom_centaur() rc         = %u\n"
                   "    gpe_scom_centaur() errorIndex = %d\n",
                   ((AsyncRequest*)(&request))->state,
                   S_parms.rc, S_parms.errorIndex);
                   
            rc = CENTAUR_READ_TPC_ID_FAILURE;
        }

        if (0) {

            // Debug

            slvctl.value = G_gsc_lastSlaveControl;
            
            PRINTD("centaur_configuration_create:Debug\n"
                   "    Last SCOM (PowerBus) address  = 0x%016llx\n"
                   "    Last Slave Control            = 0x%016llx\n"
                   "    Extended Address (positioned) = 0x%016llx\n"
                   "    Last OCI Address              = 0x%016llx\n",
                   G_gsc_lastScomAddress,
                   G_gsc_lastSlaveControl,
                   (unsigned long long)(slvctl.fields.extaddr) <<
                   (64 - 23 - 14),
                   G_gsc_lastOciAddress);
        }

    } while (0);

    // Copy the final RC into the global structure and done.

    memcpy_real(&(G_centaurConfiguration.configRc), &rc, sizeof(rc));

    return rc;
}
Beispiel #4
0
// Schedule a GPE request for the specified DIMM state
bool schedule_dimm_req(uint8_t i_state)
{
    bool l_scheduled = false;
    bool scheduleRequest = true;

    DIMM_DBG("dimm_sm called with state 0x%02X (tick=%d)", i_state, DIMM_TICK);

    if (!async_request_is_idle(&G_dimm_sm_request.request))
    {
        INTR_TRAC_ERR("dimm_sm: request is not idle.");
    }
    else
    {
        switch(i_state)
        {
            // Init
            case DIMM_STATE_INIT:
                break;

                // Read DIMM temp
            case DIMM_STATE_WRITE_MODE:
            case DIMM_STATE_WRITE_ADDR:
            case DIMM_STATE_INITIATE_READ:
            case DIMM_STATE_READ_TEMP:
                break;

                // I2C reset
            case DIMM_STATE_RESET_MASTER:
            case DIMM_STATE_RESET_SLAVE_P0:
            case DIMM_STATE_RESET_SLAVE_P0_COMPLETE:
            case DIMM_STATE_RESET_SLAVE_P1:
            case DIMM_STATE_RESET_SLAVE_P1_COMPLETE:
                break;

            default:
                INTR_TRAC_ERR("dimm_sm: Invalid state (0x%02X)", i_state);
                errlHndl_t err = NULL;
                /*
                 * @errortype
                 * @moduleid    DIMM_MID_DIMM_SM
                 * @reasoncode  DIMM_INVALID_STATE
                 * @userdata1   DIMM state
                 * @userdata2   0
                 * @devdesc     Invalid DIMM I2C state requested
                 */
                err = createErrl(DIMM_MID_DIMM_SM,
                                 DIMM_INVALID_STATE,
                                 OCC_NO_EXTENDED_RC,
                                 ERRL_SEV_PREDICTIVE,
                                 NULL,
                                 DEFAULT_TRACE_SIZE,
                                 i_state,
                                 0);
                // Request reset since this should never happen.
                REQUEST_RESET(err);
                scheduleRequest = false;
                break;
        }

        if (scheduleRequest)
        {
            // Clear errors and init common arguments for GPE
            G_dimm_sm_args.error.error = 0;
            G_dimm_sm_args.state = i_state;

            DIMM_DBG("dimm_sm: Scheduling GPE1 DIMM I2C state 0x%02X (tick %d)", i_state, DIMM_TICK);
            int l_rc = gpe_request_schedule(&G_dimm_sm_request);
            if (0 == l_rc)
            {
                l_scheduled = true;
            }
            else
            {
                errlHndl_t l_err = NULL;
                INTR_TRAC_ERR("dimm_sm: schedule failed w/rc=0x%08X (%d us)",
                              l_rc, (int) ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000)));
                /*
                 * @errortype
                 * @moduleid    DIMM_MID_DIMM_SM
                 * @reasoncode  SSX_GENERIC_FAILURE
                 * @userdata1   GPE shedule returned rc code
                 * @userdata2   state
                 * @devdesc     dimm_sm schedule failed
                 */
                l_err = createErrl(DIMM_MID_DIMM_SM,
                                   SSX_GENERIC_FAILURE,
                                   ERC_DIMM_SCHEDULE_FAILURE,
                                   ERRL_SEV_PREDICTIVE,
                                   NULL,
                                   DEFAULT_TRACE_SIZE,
                                   l_rc,
                                   i_state);
                // Request reset since this should never happen.
                REQUEST_RESET(l_err);
            }
        }
    }

    return l_scheduled;

} // end schedule_dimm_req()
Beispiel #5
0
// Function Specification
//
// Name: task_dcom_tx_slv_outbox
//
// Description: Copy slave outboxes from SRAM to main memory
//              so slave can send data to master
//
// Task Flags:  RTL_FLAG_NONMSTR, RTL_FLAG_MSTR, RTL_FLAG_OBS, RTL_FLAG_ACTIVE,
//              RTL_FLAG_NOAPSS, RTL_FLAG_RUN, RTL_FLAG_MSTR_READY
//
// End Function Specification
void task_dcom_tx_slv_outbox( task_t *i_self)
{
    static bool l_error = FALSE;
    uint32_t    l_orc = OCC_SUCCESS_REASON_CODE;
    uint32_t    l_orc_ext = OCC_NO_EXTENDED_RC;
    // Use a static local bool to track whether the BCE request used
    // here has ever been successfully created at least once
    static bool L_bce_slv_outbox_tx_request_created_once = FALSE;

    DCOM_DBG("3. TX Slave Outboxes\n");

    do
    {
        // Build/setup outbox
        uint32_t l_addr_in_mem = dcom_build_slv_outbox();
        uint32_t l_ssxrc = 0;

        // See dcomMasterRx.c/task_dcom_rx_slv_outboxes for details on the
        // checking done here before creating and scheduling the request.
        bool l_proceed_with_request_and_schedule = FALSE;
        int l_req_idle = async_request_is_idle(&(G_slv_outbox_tx_pba_request.request));
        int l_req_complete = async_request_completed(&(G_slv_outbox_tx_pba_request.request));

        if (!L_bce_slv_outbox_tx_request_created_once)
        {
            // Do this case first, all other cases assume that this is
            // true!
            // This is the first time we have created a request so
            // always proceed with request create and schedule
            l_proceed_with_request_and_schedule = TRUE;
        }
        else if (l_req_idle && l_req_complete)
        {
            // Most likely case first.  The request was created
            // and scheduled and has completed without error.  Proceed.
            // Proceed with request create and schedule.
            l_proceed_with_request_and_schedule = TRUE;
        }
        else if (l_req_idle && !l_req_complete)
        {
            // There was an error on the schedule request or the request
            // was scheduled but was canceled, killed or errored out.
            // Proceed with request create and schedule.
            l_proceed_with_request_and_schedule = TRUE;

            // Trace important information from the request
            TRAC_INFO("BCE slv outbox tx request idle but not complete, \
                      callback_rc=%d options=0x%x state=0x%x abort_state=0x%x \
                      completion_state=0x%x",
                      G_slv_outbox_tx_pba_request.request.callback_rc,
                      G_slv_outbox_tx_pba_request.request.options,
                      G_slv_outbox_tx_pba_request.request.state,
                      G_slv_outbox_tx_pba_request.request.abort_state,
                      G_slv_outbox_tx_pba_request.request.completion_state);
            TRAC_INFO("Proceeding with BCE slv outbox tx request and schedule");
        }
        else if (!l_req_idle && !l_req_complete)
        {
            // The request was created and scheduled but is still in
            // progress or still enqueued OR there was some error
            // creating the request so it was never scheduled.  The latter
            // case is unlikely and will generate an error message when
            // it occurs.  It will also have to happen after the request
            // was created at least once or we'll never get here.  If the
            // request does fail though before the state parms in the
            // request are reset (like a bad parameter error), then this
            // represents a hang condition that we can't recover from.
            // DO NOT proceed with request create and schedule.
            l_proceed_with_request_and_schedule = FALSE;

            // Trace important information from the request
            TRAC_INFO("BCE slv outbox tx request not idle and not complete, \
                      callback_rc=%d options=0x%x state=0x%x abort_state=0x%x \
                      completion_state=0x%x",
                      G_slv_outbox_tx_pba_request.request.callback_rc,
                      G_slv_outbox_tx_pba_request.request.options,
                      G_slv_outbox_tx_pba_request.request.state,
                      G_slv_outbox_tx_pba_request.request.abort_state,
                      G_slv_outbox_tx_pba_request.request.completion_state);
            TRAC_INFO("NOT proceeding with BCE slv outbox tx request and schedule");
        }
Beispiel #6
0
void amec_update_fw_sensors(void)
{
    errlHndl_t l_err                = NULL;
    int rc                          = 0;
    int rc2                         = 0;
    static bool l_first_call        = TRUE;
    bool l_gpe0_idle, l_gpe1_idle;
    static int L_consec_trace_count = 0;

    // ------------------------------------------------------
    // Update OCC Firmware Sensors from last tick
    // ------------------------------------------------------
    int l_last_state = G_fw_timing.amess_state;
    // RTLtickdur    = duration of last tick's RTL ISR (max = 250us)
    sensor_update( AMECSENSOR_PTR(RTLtickdur), G_fw_timing.rtl_dur);
    // AMEintdur     = duration of last tick's AMEC portion of RTL ISR
    sensor_update( AMECSENSOR_PTR(AMEintdur), G_fw_timing.ameint_dur);
    // AMESSdurX     = duration of last tick's AMEC state
    if(l_last_state >= NUM_AMEC_SMH_STATES)
    {
        // Sanity check.  Trace this out, even though it should never happen.
        TRAC_INFO("AMEC State Invalid, Sensor Not Updated");
    }
    else
    {
        // AMESSdurX = duration of last tick's AMEC state
        sensor_update( AMECSENSOR_ARRAY_PTR(AMESSdur0, l_last_state),  G_fw_timing.amess_dur);
    }

    // ------------------------------------------------------
    // Kick off GPE programs to track WorstCase time in GPE
    // and update the sensors.
    // ------------------------------------------------------
    if( (NULL != G_fw_timing.gpe0_timing_request)
        && (NULL != G_fw_timing.gpe1_timing_request) )
    {
        //Check if both GPE engines were able to complete the last GPE job on
        //the queue within 1 tick.
        l_gpe0_idle = async_request_is_idle(&G_fw_timing.gpe0_timing_request->request);
        l_gpe1_idle = async_request_is_idle(&G_fw_timing.gpe1_timing_request->request);
        if(l_gpe0_idle && l_gpe1_idle)
        {
            //reset the consecutive trace count
            L_consec_trace_count = 0;

            //Both GPE engines finished on time. Now check if they were
            //successful too.
            if( async_request_completed(&(G_fw_timing.gpe0_timing_request->request))
                && async_request_completed(&(G_fw_timing.gpe1_timing_request->request)) )
            {
                // GPEtickdur0 = duration of last tick's PORE-GPE0 duration
                sensor_update( AMECSENSOR_PTR(GPEtickdur0), G_fw_timing.gpe_dur[0]);
                // GPEtickdur1 = duration of last tick's PORE-GPE1 duration
                sensor_update( AMECSENSOR_PTR(GPEtickdur1), G_fw_timing.gpe_dur[1]);
            }
            else
            {
                //This case is expected on the first call of the function.
                //After that, this should not happen.
                if(!l_first_call)
                {
                    //Note: FFDC for this case is gathered by each task
                    //responsible for a GPE job.
                    TRAC_INFO("GPE task idle but GPE task did not complete");
                }
                l_first_call = FALSE;
            }

            // Update Time used to measure GPE duration.
            G_fw_timing.rtl_start_gpe = G_fw_timing.rtl_start;

            // Schedule the GPE Routines that will run and update the worst
            // case timings (via callback) after they complete.  These GPE
            // routines are the last GPE routines added to the queue
            // during the RTL tick.
            rc  = pore_flex_schedule(G_fw_timing.gpe0_timing_request);
            rc2 = pore_flex_schedule(G_fw_timing.gpe1_timing_request);

            if(rc || rc2)
            {
                /* @
                 * @errortype
                 * @moduleid    AMEC_UPDATE_FW_SENSORS
                 * @reasoncode  SSX_GENERIC_FAILURE
                 * @userdata1   return code - gpe0
                 * @userdata2   return code - gpe1
                 * @userdata4   OCC_NO_EXTENDED_RC
                 * @devdesc     Failure to schedule PORE-GPE poreFlex object for FW timing
                 *              analysis.
                 */
                l_err = createErrl(
                    AMEC_UPDATE_FW_SENSORS,             //modId
                    SSX_GENERIC_FAILURE,                //reasoncode
                    OCC_NO_EXTENDED_RC,                 //Extended reason code
                    ERRL_SEV_INFORMATIONAL,             //Severity
                    NULL,                               //Trace Buf
                    DEFAULT_TRACE_SIZE,                 //Trace Size
                    rc,                                 //userdata1
                    rc2);                               //userdata2

                // commit error log
                commitErrl( &l_err );
            }
        }
        else if(L_consec_trace_count < MAX_CONSEC_TRACE)
        {
            uint64_t l_dbg1;

            // Reset will eventually be requested due to not having power measurement
            // data after X ticks, but add some additional FFDC to the trace that
            // will tell us what GPE job is currently executing.
            if(!l_gpe0_idle)
            {
                l_dbg1 = in64(PORE_GPE0_DBG1);
                TRAC_ERR("GPE0 programs did not complete within one tick. DBG1[0x%08x%08x]",
                          l_dbg1 >> 32,
                          l_dbg1 & 0x00000000ffffffffull);
            }