예제 #1
0
파일: homer.c 프로젝트: JoeYang4/occ
/*
 * Function Specification
 *
 * Name: homer_log_access_error
 *
 * Description: Utility function to log an error that occurred while accessing
 *              the HOMER.
 *
 * End Function Specification
 */
void homer_log_access_error(const homer_rc_t i_homer_rc,
                            const int i_ssx_rc,
                            const uint32_t i_usr_data2)
{
    // Catch and log the homer error
    if (HOMER_SUCCESS != i_homer_rc)
    {
        // We could potentially have both an internal error dealing with the
        // homer and an SSX error, for example we could find an unsupported
        // version number in the homer and then have an ssx error trying to
        // unmap the homer address space.  This check catches all those cases.
        if (SSX_OK != i_ssx_rc)
        {
            /* @
             * @errortype
             * @moduleid    MAIN_MID
             * @reasoncode  SSX_GENERIC_FAILURE
             * @userdata1   HOMER and SSX return codes
             * @userdata2   Host interrupt type used
             * @userdata4   ERC_HOMER_MAIN_SSX_ERROR
             * @devdesc     An SSX error occurred mapping the HOMER host data
             *              into the OCC address space. User word 1 contains
             *              both the internal and SSX return codes returned
             *              by the method used to access the HOMER data.
             */
            errlHndl_t l_err = createErrl(MAIN_MID,                 //modId
                                          SSX_GENERIC_FAILURE,      //reasoncode
                                          ERC_HOMER_MAIN_SSX_ERROR, //Extended reason code
                                          ERRL_SEV_PREDICTIVE,      //Severity
                                          NULL,                     //Trace Buf
                                          DEFAULT_TRACE_SIZE,       //Trace Size
                                          (i_homer_rc << 16) | (0xFFFF & (uint32_t)i_ssx_rc), //userdata1
                                          i_usr_data2);             //userdata2
            commitErrl(&l_err);
        }
        else
        {
            /* @
             * @errortype
             * @moduleid    MAIN_MID
             * @reasoncode  INTERNAL_FAILURE
             * @userdata1   HOMER return code
             * @userdata2   Default host interrupt type used.
             * @userdata4   ERC_HOMER_MAIN_ACCESS_ERROR
             * @devdesc     Error accessing initialization data
             */
            errlHndl_t l_err = createErrl(MAIN_MID,                 //modId
                                          INTERNAL_FAILURE,         //reasoncode
                                          ERC_HOMER_MAIN_ACCESS_ERROR,//Extended reason code
                                          ERRL_SEV_INFORMATIONAL,   //Severity
                                          NULL,                     //Trace Buf
                                          DEFAULT_TRACE_SIZE,       //Trace Size
                                          i_homer_rc,               //userdata1
                                          i_usr_data2);             //userdata2
            commitErrl(&l_err);
        }
    }
}
예제 #2
0
// Function Specification
//
//  Name: apssInitApplet
//
//  Description: Entry point function
//
// End Function Specification
errlHndl_t apssInitApplet(void * i_arg)
{
    errlHndl_t l_err = NULL;

    // Initialize APSS
    l_err = apss_initialize();

    if(NULL != l_err)
    {
        TRAC_ERR("APSS Init failed! (retrying) ErrLog[%p]", l_err);
        setErrlSevToInfo(l_err);
        // commit & delete
        commitErrl(&l_err);

        // Retry one more time
        l_err = apss_initialize();

        if(NULL != l_err)
        {
            TRAC_ERR("APSS Init failed again! ErrLog[%p]",l_err);
        }
    }

    return l_err;
}
예제 #3
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestTime
//
// Description: errlTestTime
//
// End Function Specification
uint32_t errlTestTime()
{
    uint32_t l_rc = 0;

    do
    {
        ERRL_DBG("START");
        errlHndl_t l_handle = NULL;
        uint64_t l_start = 0;
        uint64_t l_end = 0;


        /****************************************************/
        // Check timeStamp
        // Create one log
        l_start = ssx_timebase_get();
        l_handle = createErrl( 0x1716, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_CALLHOME_DATA, g_trac_inf, 128, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);


        // check time stamp
        errlHndl_t l_handle2 = l_handle;
        commitErrl( &l_handle );
        l_end = ssx_timebase_get();
        CHECK_CONDITION( (l_handle2->iv_userDetails.iv_timeStamp >= l_start) &&
                         (l_handle2->iv_userDetails.iv_timeStamp <= l_end ), l_rc);

        deleteErrl(&l_handle2);
        ERRL_DBG("END \n");

    }while(0);

    return l_rc;
}
예제 #4
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestSetErrlSevToInfo
//
// Description: errlTestSetErrlSevToInfo
//
// End Function Specification
uint32_t errlTestSetErrlSevToInfo()
{
    uint32_t l_rc = 0;
    ERRL_DBG("START");

    do
    {
        errlHndl_t l_handle = NULL;

        /****************************************************/
        // Check setErrlSevToInfo
        // Create ERRL_SEV_PREDICTIVE log
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE,g_trac_inf, 128, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // Add callout
        addCalloutToErrl(l_handle,ERRL_CALLOUT_TYPE_HUID,0x00,ERRL_CALLOUT_PRIORITY_LOW);
        CHECK_CONDITION( l_handle->iv_numCallouts == 1, l_rc);

        // Call setErrlSevToInfo. Callouts within log should be cleared and
        // iv_severity should be set to ERRL_SEV_INFORMATIONAL
        setErrlSevToInfo(l_handle);
        CHECK_CONDITION( (l_handle->iv_numCallouts == 0) &&
                         (l_handle->iv_severity == ERRL_SEV_INFORMATIONAL), l_rc);

        deleteErrl( &l_handle );
        ppdumpslot();

        /****************************************************/
        // Check setErrlSevToInfo after errl is committed
        // Create log
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE,g_trac_inf, 128, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        errlHndl_t l_log = l_handle;

        // Add callout
        addCalloutToErrl(l_handle,ERRL_CALLOUT_TYPE_HUID,0x00,ERRL_CALLOUT_PRIORITY_LOW);
        CHECK_CONDITION( l_handle->iv_numCallouts == 1, l_rc);

        // Commit log and call setErrlSevToInfo. But setErrlSevToInfo will do nothing
        commitErrl( &l_handle );
        setErrlSevToInfo(l_handle);
        CHECK_CONDITION( (l_log->iv_numCallouts == ERRL_MAX_CALLOUTS) &&
                         (l_log->iv_severity == ERRL_SEV_PREDICTIVE), l_rc);

        deleteErrl(&l_log);
        ERRL_DBG("END \n");

    }while(0);

    return l_rc;
}
예제 #5
0
파일: dimm.c 프로젝트: ploetzma/occ
// Called after a failure to read a DIMM temperature.  The error will
// be counted and if threshold is reached, and error will be created with
// the DIMM as a callout and then set flag to trigger I2C reset
void mark_dimm_failed()
{
    const uint8_t port = G_dimm_sm_args.i2cPort;
    const uint8_t dimm = G_dimm_sm_args.dimm;
    INTR_TRAC_ERR("mark_dimm_failed: DIMM%04X failed in state/rc/count=0x%06X "
                  "(ffdc 0x%08X%08X, completion_state 0x%02X)",
                  DIMM_AND_PORT, (G_dimm_sm_args.state << 16) | (G_dimm_sm_args.error.rc << 8) | G_dimm[port][dimm].errorCount,
                  WORD_HIGH(G_dimm_sm_args.error.ffdc),
                  WORD_LOW(G_dimm_sm_args.error.ffdc),
                  G_dimm_sm_request.request.completion_state);

    if (++G_dimm[port][dimm].errorCount > MAX_CONSECUTIVE_DIMM_RESETS)
    {
        // Disable collection on this DIMM, collect FFDC and log error
        G_dimm[port][dimm].disabled = true;
        INTR_TRAC_ERR("mark_dimm_failed: disabling DIMM%04X due to %d consecutive errors (state=%d)",
                      DIMM_AND_PORT, G_dimm[port][dimm].errorCount, G_dimm_sm_args.state);
        errlHndl_t l_err = NULL;
        /*
         * @errortype
         * @moduleid    DIMM_MID_MARK_DIMM_FAILED
         * @reasoncode  DIMM_GPE_FAILURE
         * @userdata1   GPE returned rc code
         * @userdata4   ERC_DIMM_COMPLETE_FAILURE
         * @devdesc     Failure writing dimm i2c mode register
         */
        l_err = createErrl(DIMM_MID_MARK_DIMM_FAILED,
                           DIMM_GPE_FAILURE,
                           ERC_DIMM_COMPLETE_FAILURE,
                           ERRL_SEV_INFORMATIONAL,
                           NULL,
                           DEFAULT_TRACE_SIZE,
                           G_dimm_sm_args.error.rc,
                           0);
        addUsrDtlsToErrl(l_err,
                         (uint8_t*)&G_dimm_sm_request.ffdc,
                         sizeof(G_dimm_sm_request.ffdc),
                         ERRL_STRUCT_VERSION_1,
                         ERRL_USR_DTL_BINARY_DATA);
        addCalloutToErrl(l_err,
                         ERRL_CALLOUT_TYPE_HUID,
                         G_sysConfigData.dimm_huids[port][dimm],
                         ERRL_CALLOUT_PRIORITY_HIGH);
        commitErrl(&l_err);
    }

    // Reset DIMM I2C engine
    G_dimm_i2c_reset_required = true;
    G_dimm_i2c_reset_cause = port<<24 | dimm<<16 | (G_dimm_sm_args.error.rc & 0xFFFF);
    G_dimm_state = DIMM_STATE_RESET_MASTER;

} // end mark_dimm_failed()
예제 #6
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestWordAlign
//
// Description: errlTestWordAlign
//
// End Function Specification
uint32_t errlTestWordAlign()
{
    uint32_t l_rc = 0;
    uint16_t l_entrySizeBefore = 0;
    uint16_t l_entrySizeAfter = 0;
    ERRL_DBG("START");

    do
    {
        /****************************************************/
        // Test word align for addUsrDtlsToErrl
        // Create log
        errlHndl_t l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, 0, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // l_handle will set to NULL after calling the commitErrl, so we need to store it
        errlHndl_t l_handleX = l_handle;
        ppdumpslot();

        // add 13 bytes of "user details"
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        memset( G_data, 0xAA, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle, G_data, 13, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 13 bytes" );
        ppdumpslot();
        // (header + WORDALIGN(13)) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter == (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+16), l_rc);

        /****************************************************/
        // Test word align for addTraceToErrl
        // add 21 bytes of trace
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 21, l_handle); // @at012c
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 21 bytes" );
        ppdumpslot();
        // (header + WORDALIGN(21)) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter <= (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+24), l_rc);

        commitErrl( &l_handle );
        deleteErrl(&l_handleX);
        ERRL_DBG("Slots should now be empty");
        ppdumpslot();
        ERRL_DBG("END \n");
    }while(0);

    return l_rc;
}
예제 #7
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestCreateCommitDeleteLog
//
// Description: errlTestCreateCommitDeleteLog
//
// End Function Specification
uint32_t errlTestCreateCommitDeleteLog()
{
    ERRL_DBG("START");
    uint32_t l_rc = 0;

    do
    {
        /****************************************************/
        // Test create log
        errlHndl_t l_handle = NULL;
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_CALLHOME_DATA, g_trac_inf, 512, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        ERRL_DBG("Slots after Creating call home log" );
        ppdumpslot();

        /****************************************************/
        // Test commit log
        errlHndl_t l_handle2 = l_handle;
        commitErrl( &l_handle );
        CHECK_CONDITION( (l_handle == NULL) &&
                         (l_handle2->iv_userDetails.iv_committed == 1), l_rc);

        ERRL_DBG("Slots after Commiting call home log" );
        dumpLog( l_handle2, l_handle2->iv_userDetails.iv_entrySize );
        ppdumpslot();

        /****************************************************/
        // Test delete log
        deleteErrl(&l_handle2);
        CHECK_CONDITION( l_handle2 == NULL, l_rc);

        ERRL_DBG("Slots after delete Log" );
        ppdumpslot();

        ERRL_DBG("END \n");

    }while(0);

    return l_rc;
}
예제 #8
0
// Function Specification
//
// Name:  cmdh_mnfg_get_sensor
//
// Description: Returns a list of selected sensors
//
// End Function Specification
uint8_t cmdh_mnfg_get_sensor(const cmdh_fsp_cmd_t * i_cmd_ptr,
                             cmdh_fsp_rsp_t * o_rsp_ptr)
{
    uint8_t                         l_rc = ERRL_RC_SUCCESS;
    uint16_t                        l_gsid;
    uint16_t                        l_resp_data_length = 0;
    uint16_t                        l_datalength;
    uint16_t                        l_num_of_sensors = 1;
    cmdh_mfg_get_sensor_query_t     *l_cmd_ptr =
                                    (cmdh_mfg_get_sensor_query_t*) i_cmd_ptr;
    cmdh_mfg_get_sensor_resp_t      *l_resp_ptr =
                                    (cmdh_mfg_get_sensor_resp_t*) o_rsp_ptr;
    sensor_info_t                   l_sensor_info;
    errlHndl_t                      l_err = NULL;
    sensor_t*                       l_sensor_ptr;

    do
    {
        // Do sanity check on the function inputs
        if ((NULL == i_cmd_ptr) || (NULL == o_rsp_ptr))
        {
            TRAC_ERR("cmdh_mnfg_get_sensor: invalid pointers. cmd[0x%08x] rsp[0x%08x]",
                     (uint32_t) i_cmd_ptr, (uint32_t) o_rsp_ptr);
            l_rc = ERRL_RC_INTERNAL_FAIL;
            break;
        }

        // Check packet data length
        l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr);
        if(l_datalength < (sizeof(cmdh_mfg_get_sensor_query_t) -
                          sizeof(cmdh_fsp_cmd_header_t)))
        {
            TRAC_ERR("cmdh_mnfg_get_sensor: incorrect data length. exp[%d] act[%d]",
                     (sizeof(cmdh_mfg_get_sensor_query_t) -
                      sizeof(cmdh_fsp_cmd_header_t)),
                     l_datalength);
            l_rc = ERRL_RC_INVALID_CMD_LEN;
            break;
        }

        // Check version
        if(l_cmd_ptr->version != MFG_LIST_SENSOR_VERSION)
        {
            TRAC_ERR("cmdh_mnfg_get_sensor: incorrect version. exp[%d] act[%d]",
                     MFG_GET_SENSOR_VERSION,
                     l_cmd_ptr->version);
            l_rc = ERRL_RC_INVALID_DATA;
            break;
        }

        // Capture user inputs
        l_gsid = l_cmd_ptr->gsid;

        TRAC_INFO("cmdh_mnfg_get_sensor: gsid[0x%04x]", l_gsid);

        // Initialize the sensor query arguments
        querySensorListArg_t l_qsl_arg =
        {
            l_gsid,                 // i_startGsid - passed by the caller
            0,                      // i_present - passed by the caller
            AMEC_SENSOR_TYPE_ALL,   // i_type
            AMEC_SENSOR_LOC_ALL,    // i_loc
            &l_num_of_sensors,      // io_numOfSensors
            NULL,                   // o_sensors - not needed
            &l_sensor_info          // o_sensorInfoPtr
        };

        // Get the sensor list
        l_err = querySensorList(&l_qsl_arg);

        if (NULL != l_err)
        {
            // Query failure
            TRAC_ERR("cmdh_mnfg_get_sensor: Failed to get sensor list. Error status is: 0x%x",
                     l_err->iv_reasonCode);

            // Commit error log
            commitErrl(&l_err);
            l_rc = ERRL_RC_INTERNAL_FAIL;
            break;
        }
        else
        {
            l_resp_ptr->gsid = l_gsid;

            // Some of the response comes from the sensor
            l_sensor_ptr = getSensorByGsid(l_gsid);
            if (l_sensor_ptr == NULL)
            {
                TRAC_INFO("cmdh_mnfg_get_sensor: Didn't find sensor with gsid[0x%.4X]. Min/Max values won't be accurate.",
                          l_gsid);
                l_resp_ptr->sample = 0;
                l_resp_ptr->min = 0xFFFF;
                l_resp_ptr->max = 0;
                l_resp_ptr->accumulator = 0;
                l_resp_ptr->status = 0;
            }
            else
            {
                l_resp_ptr->sample = l_sensor_ptr->sample;
                l_resp_ptr->min = l_sensor_ptr->sample_min;
                l_resp_ptr->max = l_sensor_ptr->sample_max;
                // Truncate accumulator to 4 bytes (should not be used)
                l_resp_ptr->accumulator = (uint32_t)l_sensor_ptr->accumulator;
                l_resp_ptr->status = *(uint8_t*)(&l_sensor_ptr->status);
            }

            // The rest of the response comes from the sensor info
            memcpy(l_resp_ptr->name, l_sensor_info.name, sizeof(l_resp_ptr->name));
            memcpy(l_resp_ptr->units, l_sensor_info.sensor.units, sizeof(l_resp_ptr->units));
            l_resp_ptr->freq = l_sensor_info.sensor.freq;
            l_resp_ptr->scalefactor = l_sensor_info.sensor.scalefactor;
            l_resp_ptr->location = l_sensor_info.sensor.location;
            l_resp_ptr->type = l_sensor_info.sensor.type;
        }

    }while(0);

    // Populate the response data header
    l_resp_data_length = sizeof(cmdh_mfg_get_sensor_resp_t) -
                         sizeof(cmdh_fsp_rsp_header_t);
    G_rsp_status = l_rc;
    o_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0];
    o_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1];

    return l_rc;
}
예제 #9
0
// Function Specification
//
// Name:  cmdh_mnfg_list_sensors
//
// Description: Returns a list of selected sensors
//
// End Function Specification
uint8_t cmdh_mnfg_list_sensors(const cmdh_fsp_cmd_t * i_cmd_ptr,
                           cmdh_fsp_rsp_t * o_rsp_ptr)
{
    uint8_t                         l_rc = ERRL_RC_SUCCESS;
    uint16_t                        l_type = 0;
    uint16_t                        l_location = 0;
    uint16_t                        l_start_gsid;
    uint16_t                        i = 0;
    uint16_t                        l_resp_data_length = 0;
    uint16_t                        l_datalength;
    uint16_t                        l_num_of_sensors = MFG_MAX_NUM_SENSORS + 1;
    cmdh_mfg_list_sensors_query_t   *l_cmd_ptr =
                                    (cmdh_mfg_list_sensors_query_t*) i_cmd_ptr;
    cmdh_mfg_list_sensors_resp_t    *l_resp_ptr =
                                    (cmdh_mfg_list_sensors_resp_t*) o_rsp_ptr;
    sensorQueryList_t               l_sensor_list[MFG_MAX_NUM_SENSORS + 1];
    errlHndl_t                      l_err = NULL;

    do
    {
        // Do sanity check on the function inputs
        if ((NULL == i_cmd_ptr) || (NULL == o_rsp_ptr))
        {
            TRAC_ERR("cmdh_mnfg_list_sensors: invalid pointers. cmd[0x%08x] rsp[0x%08x]",
                     (uint32_t) i_cmd_ptr, (uint32_t) o_rsp_ptr);
            l_rc = ERRL_RC_INTERNAL_FAIL;
            break;
        }

        // Check packet data length
        l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr);
        if(l_datalength < (sizeof(cmdh_mfg_list_sensors_query_t) -
                          sizeof(cmdh_fsp_cmd_header_t)))
        {
            TRAC_ERR("cmdh_mnfg_list_sensors: incorrect data length. exp[%d] act[%d]",
                     (sizeof(cmdh_mfg_list_sensors_query_t) -
                      sizeof(cmdh_fsp_cmd_header_t)),
                     l_datalength);
            l_rc = ERRL_RC_INVALID_CMD_LEN;
            break;
        }

        // Check version
        if(l_cmd_ptr->version != MFG_LIST_SENSOR_VERSION)
        {
            TRAC_ERR("cmdh_mnfg_list_sensors: incorrect version. exp[%d] act[%d]",
                     MFG_LIST_SENSOR_VERSION,
                     l_cmd_ptr->version);
            l_rc = ERRL_RC_INVALID_DATA;
            break;
        }

        // Capture user inputs
        l_type = l_cmd_ptr->type;
        l_location = l_cmd_ptr->location;
        l_start_gsid = l_cmd_ptr->start_gsid;

        TRAC_INFO("cmdh_mnfg_list_sensors: Type[0x%04x] Location[0x%04x]",
                  l_type,
                  l_location);

        // Initialize the sensor query arguments
        const querySensorListArg_t l_qsl_arg =
        {
            l_start_gsid,           // i_startGsid - passed by the caller
            l_cmd_ptr->present,     // i_present - passed by the caller
            l_type,                 // i_type - passed by the caller
            l_location,             // i_loc - passed by the caller
            &l_num_of_sensors,      // io_numOfSensors
            l_sensor_list,          // o_sensors
            NULL                    // o_sensorInfoPtr - not needed
        };

        // Get the list of sensors
        l_err = querySensorList(&l_qsl_arg);

        if (NULL != l_err)
        {
            // Query failure
            TRAC_ERR("cmdh_mnfg_list_sensors: Failed to query sensor list. Error status is: 0x%x",
                     l_err->iv_reasonCode);

            // Commit error log
            commitErrl(&l_err);
            l_rc = ERRL_RC_INTERNAL_FAIL;
            break;
        }
        else
        {
            TRAC_INFO("cmdh_mnfg_list_sensors: Numbers of sensors found[%u]",
                      l_num_of_sensors);

            if (l_num_of_sensors > MFG_MAX_NUM_SENSORS)
            {
                // Got too many sensors back, need to truncate the list
                TRAC_INFO("cmdh_mnfg_list_sensors: Got too many sensors back[%u]. Truncating number of sensors to %u",
                          l_num_of_sensors,
                          MFG_MAX_NUM_SENSORS);

                l_num_of_sensors = MFG_MAX_NUM_SENSORS;
                l_resp_ptr->truncated = 1;
            }
            else
            {
                l_resp_ptr->truncated = 0;
            }

            // Clear out the sensor fields
            memset((void*) &(l_resp_ptr->sensor[0]), 0, (sizeof(cmdh_dbug_sensor_list_t)*l_num_of_sensors) );

            // Populate the response data packet
            l_resp_ptr->num_sensors = l_num_of_sensors;
            for (i=0; i<l_num_of_sensors; i++)
            {
                l_resp_ptr->sensor[i].gsid = l_sensor_list[i].gsid;
                l_resp_ptr->sensor[i].sample = l_sensor_list[i].sample;
                strcpy(l_resp_ptr->sensor[i].name, l_sensor_list[i].name);
            }
        }

    }while(0);

    // Populate the response data header
    l_resp_data_length = 2 + l_num_of_sensors * sizeof(cmdh_mfg_sensor_rec_t);
    G_rsp_status = l_rc;
    o_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0];
    o_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1];

    return l_rc;
}
예제 #10
0
파일: proc_pstate.c 프로젝트: deece/occ
// Function Specification
//
// Name:  populate_sapphire_tbl_to_mem
//
// Description:
//
// End Function Specification
void populate_sapphire_tbl_to_mem()
{
    int l_ssxrc = SSX_OK;
    uint32_t l_reasonCode = 0;
    uint32_t l_extReasonCode = 0;

    do
    {
#define SAPPHIRE_OFFSET_IN_HOMER 0x001F8000
        BceRequest pba_copy;
        // Set up copy request
        l_ssxrc = bce_request_create(&pba_copy,                          // block copy object
                                     &G_pba_bcue_queue,                  // sram to mainstore copy engine
                                     SAPPHIRE_OFFSET_IN_HOMER,           // mainstore address
                                     (uint32_t) &G_sapphire_table,       // sram starting address
                                     (size_t) sizeof(G_sapphire_table),  // size of copy
                                     SSX_WAIT_FOREVER,                   // no timeout
                                     NULL,                               // call back
                                     NULL,                               // call back arguments
                                     ASYNC_REQUEST_BLOCKING              // callback mask
                                     );

        if(l_ssxrc != SSX_OK)
        {
            TRAC_ERR("populate_sapphire_tbl_to_mem: PBA request create failure rc=[%08X]", -l_ssxrc);
            /*
             * @errortype
             * @moduleid    MAIN_STATE_TRANSITION_MID
             * @reasoncode  SSX_GENERIC_FAILURE
             * @userdata1   RC for PBA block-copy engine
             * @userdata4   ERC_BCE_REQUEST_CREATE_FAILURE
             * @devdesc     SSX BCE related failure
             */
            l_reasonCode = SSX_GENERIC_FAILURE;
            l_extReasonCode = ERC_BCE_REQUEST_CREATE_FAILURE;
            break;
        }

        // Do actual copying
        l_ssxrc = bce_request_schedule(&pba_copy);

        if(l_ssxrc != SSX_OK)
        {
            TRAC_ERR("populate_sapphire_tbl_to_mem: PBA request schedule failure rc=[%08X]", -l_ssxrc);
            /*
             * @errortype
             * @moduleid    MAIN_STATE_TRANSITION_MID
             * @reasoncode  SSX_GENERIC_FAILURE
             * @userdata1   RC for PBA block-copy engine
             * @userdata4   ERC_BCE_REQUEST_SCHEDULE_FAILURE
             * @devdesc     Failed to copy data by using DMA
             */
            l_reasonCode = SSX_GENERIC_FAILURE;
            l_extReasonCode = ERC_BCE_REQUEST_SCHEDULE_FAILURE;
            break;
        }
    } while(0);

    if ( l_ssxrc != SSX_OK )
    {
        errlHndl_t l_errl = createErrl(MAIN_STATE_TRANSITION_MID,    //modId
                                       l_reasonCode,                 //reasoncode
                                       l_extReasonCode,              //Extended reason code
                                       ERRL_SEV_UNRECOVERABLE,       //Severity
                                       NULL,                         //Trace Buf
                                       0,                            //Trace Size
                                       -l_ssxrc,                     //userdata1
                                       0);                           //userdata2

        // Callout firmware
        addCalloutToErrl(l_errl,
                         ERRL_CALLOUT_TYPE_COMPONENT_ID,
                         ERRL_COMPONENT_ID_FIRMWARE,
                         ERRL_CALLOUT_PRIORITY_HIGH);

        commitErrl(&l_errl);
    }
}
예제 #11
0
파일: amec_freq.c 프로젝트: JoeYang4/occ
// Function Specification
//
// Name: amec_slv_check_perf
//
// Description: Slave OCC's Detect and log degraded performance errors
//              This function will run every tick.
//
// Thread: RealTime Loop
//
// Task Flags:
//
// End Function Specification
void amec_slv_check_perf(void)
{
    /*------------------------------------------------------------------------*/
    /*  Local Variables                                                       */
    /*------------------------------------------------------------------------*/
    static BOOLEAN          l_prev_failsafe_state = FALSE;
    static BOOLEAN          l_prev_ovs_state = FALSE;
    static BOOLEAN          l_prev_pcap_state = FALSE;
    static ERRL_SEVERITY    l_pcap_sev =  ERRL_SEV_PREDICTIVE;
    static BOOLEAN          l_throttle_traced = FALSE;
    static uint64_t         l_time = 0;

    /*------------------------------------------------------------------------*/
    /*  Code                                                                  */
    /*------------------------------------------------------------------------*/

    // Verify that cores are at proper frequency
    amec_verify_pstate();

    do
    {
        // was frequency limited by power ?
        if ( G_non_dps_power_limited != TRUE )
        {
            if(l_throttle_traced)
            {
                TRAC_INFO("Frequency not limited by power algorithms anymore");
                l_throttle_traced = FALSE;
            }
            // we are done break and return
            break;
        }

        // frequency limited due to failsafe condition ?
        if ( AMEC_INTF_GET_FAILSAFE() == TRUE )
        {
            if ( l_prev_failsafe_state == TRUE)
            {
                // we are done break and return
                break;
            }
            else
            {
                // log this error ONLY ONCE per IPL
                l_prev_failsafe_state = TRUE;

                TRAC_ERR("Frequency limited due to failsafe condition(mode:%d, state:%d)",
                          CURRENT_MODE(), CURRENT_STATE());
                l_throttle_traced = TRUE;
                l_time = ssx_timebase_get();

                // log error that calls out OVS procedure
                // set error severity to RRL_SEV_PREDICTIVE

                /* @
                 * @errortype
                 * @moduleid    AMEC_SLAVE_CHECK_PERFORMANCE
                 * @reasoncode  INTERNAL_FAILURE
                 * @userdata1   Previous FailSafe State
                 * @userdata4   ERC_AMEC_SLAVE_FAILSAFE_STATE
                 * @devdesc     Frequency limited due to failsafe condition
                 */
                errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId
                                              INTERNAL_FAILURE,             //reasoncode
                                              ERC_AMEC_SLAVE_FAILSAFE_STATE,//Extended reason code
                                              ERRL_SEV_PREDICTIVE,          //Severity
                                              NULL,                         //Trace Buf
                                              DEFAULT_TRACE_SIZE,           //Trace Size
                                              l_prev_failsafe_state,        //userdata1
                                              0);                           //userdata2

                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                    ERRL_COMPONENT_ID_OVERSUBSCRIPTION,
                                    ERRL_CALLOUT_PRIORITY_HIGH
                                );

                // and sets the consolidate action flag
                setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS );

                // Commit Error
                commitErrl(&l_errl);

                // we are done lets break
                break;
            }
        }

        // frequency limited due to oversubscription condition ?
        if ( AMEC_INTF_GET_OVERSUBSCRIPTION() == TRUE )
        {
            if ( l_prev_ovs_state == TRUE)
            {
                // we are done break and return
                break;
            }
            else
            {
                // log this error ONLY ONCE per IPL
                l_prev_ovs_state = TRUE;

                TRAC_ERR("Frequency limited due to oversubscription condition(mode:%d, state:%d)",
                          CURRENT_MODE(), CURRENT_STATE());
                l_throttle_traced = TRUE;
                l_time = ssx_timebase_get();

                // log error that calls out OVS procedure
                // set error severity to RRL_SEV_PREDICTIVE

                // Updated the RC to match the actual RC passed to createErrl()
                /* @
                 * @errortype
                 * @moduleid    AMEC_SLAVE_CHECK_PERFORMANCE
                 * @reasoncode  OVERSUB_LIMIT_ALERT
                 * @userdata1   Previous OVS State
                 * @userdata4   ERC_AMEC_SLAVE_OVS_STATE
                 * @devdesc     Frequency limited due to oversubscription condition
                 */
                errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId
                                              OVERSUB_LIMIT_ALERT,           //reasoncode
                                              ERC_AMEC_SLAVE_OVS_STATE,      //Extended reason code
                                              ERRL_SEV_PREDICTIVE,           //Severity
                                              NULL,                          //Trace Buf
                                              DEFAULT_TRACE_SIZE,            //Trace Size
                                              l_prev_ovs_state,              //userdata1
                                              0);                            //userdata2

                // Callout to Oversubscription
                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                    ERRL_COMPONENT_ID_OVERSUBSCRIPTION,
                                    ERRL_CALLOUT_PRIORITY_HIGH
                                );

                // Callout to APSS
                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_HUID,
                                    G_sysConfigData.apss_huid,
                                    ERRL_CALLOUT_PRIORITY_MED
                                );

                // Callout to Firmware
                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                    ERRL_COMPONENT_ID_FIRMWARE,
                                    ERRL_CALLOUT_PRIORITY_LOW
                                );

                // and sets the consolidate action flag
                setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS );

                // Commit Error
                commitErrl(&l_errl);

                // we are done lets break
                break;
            }
        }

        uint16_t l_snrBulkPwr = AMECSENSOR_PTR(PWR250US)->sample;

        // frequency limited due to system power cap condition ?
        if (( l_snrBulkPwr > (G_sysConfigData.pcap.system_pcap - PDROP_THRESH) )
            &&
            ( G_sysConfigData.pcap.current_pcap == 0 ))
        {
            if ( l_prev_pcap_state == TRUE)
            {
                // we are done break and return
                break;
            }
            else
            {
                //log this error ONLY ONCE per IPL
                l_prev_pcap_state = TRUE;

                TRAC_ERR("Frequency limited due to power cap condition(mode:%d, state:%d)",
                         CURRENT_MODE(), CURRENT_STATE());

                TRAC_ERR("SnrBulkPwr %d > Sys Pcap %d ",l_snrBulkPwr,
                         G_sysConfigData.pcap.system_pcap );

                TRAC_ERR("SnrFanPwr %d, SnrIOPwr %d, SnrStoragePwr %d, SnrGpuPrw %d ",
                        AMECSENSOR_PTR(PWR250USFAN)->sample,
                        AMECSENSOR_PTR(PWR250USIO)->sample,
                        AMECSENSOR_PTR(PWR250USSTORE)->sample,
                        AMECSENSOR_PTR(PWR250USGPU)->sample );

                TRAC_ERR("SnrProcPwr 0 %d, SnrProcPwr 1 %d, SnrProcPwr 2 %d, SnrProcPwr 3 %d",
                        g_amec->proc_snr_pwr[0],
                        g_amec->proc_snr_pwr[1],
                        g_amec->proc_snr_pwr[2],
                        g_amec->proc_snr_pwr[3] );

                TRAC_ERR("SnrMemPwr 0 %d, SnrMemPwr 1 %d, SnrMemPwr 2 %d, SnrMemPwr 3 %d",
                        g_amec->mem_snr_pwr[0],
                        g_amec->mem_snr_pwr[1],
                        g_amec->mem_snr_pwr[2],
                        g_amec->mem_snr_pwr[3] );


                l_throttle_traced = TRUE;
                l_time = ssx_timebase_get();

                // log error that calls out firmware and APSS procedure
                // set error severity to l_pcap_sev

                /* @
                 * @errortype
                 * @moduleid    AMEC_SLAVE_CHECK_PERFORMANCE
                 * @reasoncode  PCAP_THROTTLE_POWER_LIMIT
                 * @userdata1   Current Sensor Bulk Power
                 * @userdata2   System PCAP
                 * @userdata4   ERC_AMEC_SLAVE_POWERCAP
                 * @devdesc     Frequency limited due to PowerCap  condition
                 */
                errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId
                                              PCAP_THROTTLE_POWER_LIMIT,     //reasoncode
                                              ERC_AMEC_SLAVE_POWERCAP,       //Extended reason code
                                              l_pcap_sev,                    //Severity
                                              NULL,                          //Trace Buf
                                              DEFAULT_TRACE_SIZE,            //Trace Size
                                              l_snrBulkPwr,                  //userdata1
                                              G_sysConfigData.pcap.system_pcap);//userdata2

                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                    ERRL_COMPONENT_ID_FIRMWARE,
                                    ERRL_CALLOUT_PRIORITY_HIGH
                                );

                addCalloutToErrl(   l_errl,
                                    ERRL_CALLOUT_TYPE_HUID,
                                    G_sysConfigData.apss_huid,
                                    ERRL_CALLOUT_PRIORITY_HIGH
                                );

                // and sets the consolidate action flag
                setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS );

                // then l_pcap_sev to informational
                l_pcap_sev = ERRL_SEV_INFORMATIONAL;

                // Commit Error
                commitErrl(&l_errl);

                // we are done lets break
                break;
            }
        }

        // trottle trace to every 3600 seconds (1hr = 3600000)
        if(!l_throttle_traced && ( DURATION_IN_MS_UNTIL_NOW_FROM(l_time) > 3600000 ) )
        {
            TRAC_INFO("Frequency power limited due to transient condition: PowerLimited=%x, FailSafe=%x, OverSubScription=%x CurrentBulkPwr=%x",
            G_non_dps_power_limited, AMEC_INTF_GET_FAILSAFE(), AMEC_INTF_GET_OVERSUBSCRIPTION(), l_snrBulkPwr );
            l_throttle_traced = TRUE;

            l_time = ssx_timebase_get();
        }
    }
    while( 0 );

    return;
}
예제 #12
0
// Function Specification
//
// Name:  Dcom_thread_routine
//
// Description: Purpose of this task is to handle messages passed from
//              Master to Slave and vice versa.
//
//              Nothing in this thread should be time-critical, but should
//              happen more often than the 1-second that other threads run
//              at.
//
//              This thread currently runs ~1ms, based on the RTL loop of
//              250us.
//
//              FWIW -- It is pointless to set this thread to run any more
//              often than the length of the RTL loop, since it is acting
//              on data passed back and forth via that loop.
//
// End Function Specification
void Dcom_thread_routine(void *arg)
{
    OCC_STATE l_newOccState  = 0;
    OCC_MODE  l_newOccMode   = 0;
    SsxTimer  l_timeout_timer;
    errlHndl_t l_errlHndl = NULL;
    // --------------------------------------------------
    // Create a timer that pops every 10 seconds to wake up
    // this thread, in case a semaphore never gets posted.
    // TODO: Is this really needed?
    // --------------------------------------------------
    ssx_timer_create(&l_timeout_timer,
                     (SsxTimerCallback) ssx_semaphore_post,
                     (void *) &G_dcomThreadWakeupSem);
    ssx_timer_schedule(&l_timeout_timer,
                       SSX_SECONDS(10),
                       SSX_SECONDS(10));

    for(;;)
    {
        // --------------------------------------------------
        // Wait on Semaphore until we get new data over DCOM
        // (signalled by sem_post() or timeout occurs.
        // Sem timeout is designed to be the slowest
        // interval we will attempt to run this thread at.
        // --------------------------------------------------

        // Wait for sem_post before we run through this thread.
        ssx_semaphore_pend(&G_dcomThreadWakeupSem, SSX_WAIT_FOREVER);

        // --------------------------------------------------
        // Counter to ensure thread is running (can wrap)
        // --------------------------------------------------
        G_dcom_thread_counter++;

        // --------------------------------------------------
        // Check if we need to update the sapphire table
        // --------------------------------------------------
        if(G_sysConfigData.system_type.kvm)
        {
            proc_check_for_sapphire_updates();
        }

        // --------------------------------------------------
        // Set Mode and State Based on Master
        // --------------------------------------------------
        l_newOccState = (G_occ_master_state == CURRENT_STATE()) ? OCC_STATE_NOCHANGE : G_occ_master_state;

        if(G_sysConfigData.system_type.kvm)
        {
            l_newOccMode  = (G_occ_master_mode  == G_occ_external_req_mode_kvm ) ? OCC_MODE_NOCHANGE : G_occ_master_mode;
        }
        else
        {
            l_newOccMode  = (G_occ_master_mode  == CURRENT_MODE() ) ? OCC_MODE_NOCHANGE : G_occ_master_mode;
        }

        // Override State if SAFE state is requested
        l_newOccState = ( isSafeStateRequested() ) ? OCC_STATE_SAFE : l_newOccState;

        // Override State if we are in SAFE state already
        l_newOccState = ( OCC_STATE_SAFE == CURRENT_STATE() ) ? OCC_STATE_NOCHANGE : l_newOccState;

        if( (OCC_STATE_NOCHANGE != l_newOccState)
            || (OCC_MODE_NOCHANGE != l_newOccMode) )
        {
            // If we're active, then we should always process the mode change first
            // If we're not active, then we should always process the state change first
            if(OCC_STATE_ACTIVE == CURRENT_STATE())
            {
                // Set the new mode
                l_errlHndl = SMGR_set_mode(l_newOccMode, 0 /* TODO V/F */ );
                if(l_errlHndl)
                {
                    commitErrl(&l_errlHndl);
                }
                // Set the new state
                l_errlHndl = SMGR_set_state(l_newOccState);
                if(l_errlHndl)
                {
                    commitErrl(&l_errlHndl);
                }
            }
            else
            {
                // Set the new state
                l_errlHndl = SMGR_set_state(l_newOccState);
                                if(l_errlHndl)
                {
                    commitErrl(&l_errlHndl);
                }
                // Set the new mode
                l_errlHndl = SMGR_set_mode(l_newOccMode, 0 /* TODO V/F */ );
                if(l_errlHndl)
                {
                    commitErrl(&l_errlHndl);
                }
            }
        }

        // --------------------------------------------------
        // DCM PStates
        // \_ can do sem_post to increment through state machine
        // --------------------------------------------------
        if(OCC_STATE_SAFE != CURRENT_STATE())
        {
            proc_gpsm_dcm_sync_enable_pstates_smh();
        }

        // --------------------------------------------------
        // SSX Sleep
        // --------------------------------------------------
        // Even if semaphores are continually posted, there is no reason
        // for us to run this thread any more often than once every 250us
        // so we don't starve any other thread
        ssx_sleep(SSX_MICROSECONDS(250));
    }
}
예제 #13
0
// Function Specification
//
// Name:  dbug_err_inject
//
// Description: Injects an error
//
// End Function Specification
void dbug_err_inject(const cmdh_fsp_cmd_t * i_cmd_ptr,
                           cmdh_fsp_rsp_t * i_rsp_ptr)
{
    errlHndl_t l_err;
    cmdh_dbug_inject_errl_query_t *l_cmd_ptr = (cmdh_dbug_inject_errl_query_t*) i_cmd_ptr;

    i_rsp_ptr->data_length[0] = 0;
    i_rsp_ptr->data_length[1] = 0;
    G_rsp_status = ERRL_RC_SUCCESS;

    if(!strncmp(l_cmd_ptr->comp, "RST", OCC_TRACE_NAME_SIZE))
    {
        l_err = createErrl(CMDH_DBUG_MID,     //modId
                           INTERNAL_FAILURE,             //reasoncode
                           OCC_NO_EXTENDED_RC,           //Extended reason code
                           ERRL_SEV_PREDICTIVE,          //Severity
                           NULL,                         //Trace Buf
                           DEFAULT_TRACE_SIZE,           //Trace Size
                           0xff,                         //userdata1
                           0);                           //userdata2

        if (INVALID_ERR_HNDL == l_err)
        {
            G_rsp_status = ERRL_RC_INTERNAL_FAIL;
        }

        addCalloutToErrl(l_err,
                         ERRL_CALLOUT_TYPE_HUID,         //callout type (HUID/CompID)
                         G_sysConfigData.proc_huid,      //callout data
                         ERRL_CALLOUT_PRIORITY_HIGH);    //priority

        REQUEST_RESET(l_err);
    }
    else
    {
        l_err = createErrl(CMDH_DBUG_MID,     //modId
                           INTERNAL_FAILURE,             //reasoncode
                           OCC_NO_EXTENDED_RC,           //Extended reason code
                           ERRL_SEV_UNRECOVERABLE,       //Severity
                           TRAC_get_td(l_cmd_ptr->comp), //Trace Buf
                           DEFAULT_TRACE_SIZE,           //Trace Size
                           0xff,                         //userdata1
                           0);                           //userdata2

        if (INVALID_ERR_HNDL == l_err)
        {
            G_rsp_status = ERRL_RC_INTERNAL_FAIL;
        }

        // Commit Error log
        commitErrl(&l_err);
    }

    if (G_rsp_status == ERRL_RC_INTERNAL_FAIL)
    {
        TRAC_ERR("cmdh_dbug_inject_errl: Fail creating ERR Log\n");
    }
    else
    {
        TRAC_INFO("cmdh_dbug_inject_errl: inject errl for COMP : %s\n", l_cmd_ptr->comp);
    }

    return;
}
예제 #14
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestErrorHandling
//
// Description: errlTestErrorHandling
//
// End Function Specification
uint32_t errlTestErrorHandling()
{
    uint32_t l_rc = 0;
    errlHndl_t l_errlHnd = NULL;
    uint8_t l_dataPtr[10];
    uint16_t l_entrySizeBefore = 0;
    uint16_t l_entrySizeAfter = 0;

    ERRL_DBG(" START");
    do
    {
        /****************************************************/
        // Test createErrl  with incorrect parameter
        // Set ERRL_SEVERITY to 0x04, out of range so log won't be created
        l_errlHnd = createErrl(TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, 0x04, NULL, 0, 0x01, 0x02);
        CHECK_CONDITION( l_errlHnd == INVALID_ERR_HNDL, l_rc);

         /****************************************************/
        // Test addTraceToErrl  with incorrect parameter
        // Create a log
        l_errlHnd = createErrl(TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, 0, 0x01, 0x02);
        CHECK_CONDITION( l_errlHnd != INVALID_ERR_HNDL, l_rc);

        // i_trace = NULL, so entry size doesn't change
        l_entrySizeBefore = l_errlHnd->iv_userDetails.iv_entrySize;
        addTraceToErrl(NULL, 5, l_errlHnd);
        l_entrySizeAfter = l_errlHnd->iv_userDetails.iv_entrySize;
        CHECK_CONDITION(l_entrySizeBefore == l_entrySizeAfter, l_rc);

        // i_traceSz = 0, entry size doesn't change
        l_entrySizeBefore = l_errlHnd->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 0, l_errlHnd); // @at012c
        l_entrySizeAfter = l_errlHnd->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        // io_err = NULL, entry size doesn't change
        l_entrySizeBefore = l_errlHnd->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 32, NULL); // @at012c
        l_entrySizeAfter = l_errlHnd->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        // test addTraceToErrl after log is comitted so entry size doesn't change
        errlHndl_t l_errlHndx = l_errlHnd;
        commitErrl(&l_errlHnd);
        l_entrySizeBefore = l_errlHndx->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 32, l_errlHndx); // @at012c
        l_entrySizeAfter = l_errlHndx->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        deleteErrl(&l_errlHndx);
        CHECK_CONDITION( l_errlHndx == NULL, l_rc);

        // io_err = INVALID_ERR_HNDL
        // We are making sure that this function
        // handles a INVALID_ERR_HNDL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        l_errlHnd = INVALID_ERR_HNDL;
        addTraceToErrl(g_trac_inf, 32, l_errlHnd);

        /****************************************************/
        // Test commitErrl with incorrect parameter
        // io_err = NULL
        // We are making sure that this function
        // handles a NULL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        commitErrl( NULL);

        // l_errlHnd should be set to NULL
        l_errlHnd = INVALID_ERR_HNDL;
        commitErrl(&l_errlHnd);
        CHECK_CONDITION( l_errlHnd == NULL, l_rc);

        /****************************************************/
        // Test deleteErrl with incorrect parameter
        // io_err = NULL
        // We are making sure that this function
        // handles a NULL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        deleteErrl( NULL);

        // l_errlHnd should be set to NULL
        l_errlHnd = INVALID_ERR_HNDL;
        deleteErrl(&l_errlHnd);
        CHECK_CONDITION( l_errlHnd == NULL, l_rc);

        /****************************************************/
        // Test addCalloutToErrl with incorrect parameter
        // Set io_err to NULL
        // We are making sure that this function
        // handles a NULL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        addCalloutToErrl(NULL, ERRL_CALLOUT_TYPE_HUID, 0, ERRL_CALLOUT_PRIORITY_LOW);

        // Set io_err to INVALID_ERR_HNDL
        // We are making sure that this function
        // handles a INVALID_ERR_HNDL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        addCalloutToErrl(INVALID_ERR_HNDL, ERRL_CALLOUT_TYPE_HUID, 0, ERRL_CALLOUT_PRIORITY_LOW);

        /****************************************************/
        // Test addUsrDtlsToErrl with incorrect parameter
        // Create a log
        l_errlHnd = createErrl(TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, 0, 0x01, 0x02);
        CHECK_CONDITION( l_errlHnd != INVALID_ERR_HNDL, l_rc);

        // io_err = NULL
        // We are making sure that this function
        // handles a NULL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        addUsrDtlsToErrl(NULL, l_dataPtr, 10, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA);

        // io_err = INVALID_ERR_HNDL
        // We are making sure that this function
        // handles a INVALID_ERR_HNDL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        addUsrDtlsToErrl(INVALID_ERR_HNDL, l_dataPtr, 10, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA);

        // i_dataPtr = NULL so entry size doesn't change
        l_entrySizeBefore = l_errlHnd->iv_userDetails.iv_entrySize;
        addUsrDtlsToErrl(l_errlHnd, NULL, 10, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA);
        l_entrySizeAfter = l_errlHnd->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        // i_size = 0 so so entry size doesn't change
        l_entrySizeBefore = l_errlHnd->iv_userDetails.iv_entrySize;
        addUsrDtlsToErrl(l_errlHnd, l_dataPtr, 0, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA);
        l_entrySizeAfter = l_errlHnd->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        // test addUsrDtlsToErrl after log is committed so entry size doesn't change
        l_errlHndx = l_errlHnd;
        commitErrl(&l_errlHnd);
        l_entrySizeBefore = l_errlHndx->iv_userDetails.iv_entrySize;
        addUsrDtlsToErrl(l_errlHndx, l_dataPtr, 10, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA);
        l_entrySizeAfter = l_errlHndx->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeBefore == l_entrySizeAfter, l_rc);

        deleteErrl(&l_errlHndx);
        CHECK_CONDITION( l_errlHndx == NULL, l_rc);

        /****************************************************/
        // Test setErrlSevToInfo  with incorrect parameter
        // Set io_err to NULL.
        // We are making sure that this function
        // handles a NULL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        setErrlSevToInfo(NULL);

        // Set io_err to INVALID_ERR_HNDL
        // We are making sure that this function
        // handles a INVALID_ERR_HNDL being passed, and that we can't verify if
        // an error occurred by checking anything. (It will just cause
        // a TLB exception)
        setErrlSevToInfo(INVALID_ERR_HNDL);
    }while(0);

    return l_rc;
}
예제 #15
0
파일: dcom.c 프로젝트: ploetzma/occ
// Function Specification
//
// Name: dcom_error_check
//
// Description: keep track of failure counts
//
// End Function Specification
void dcom_error_check( const dcom_error_type_t i_error_type, const bool i_clear_error, const uint32_t i_orc, const uint32_t i_orc_ext)
{
    static uint16_t L_rx_slv_outbox_fail_count = 0;
    uint16_t        l_modId = 0;
    uint16_t        *l_count_ptr = NULL;

    if ( i_error_type == SLAVE_INBOX )
    {
        l_count_ptr = &G_dcomSlvInboxCounter.currentFailCount;
        l_modId = DCOM_MID_TASK_RX_SLV_INBOX;
    }
    // if the i_error_type == SLAVE_OUTBOX then set the outbox count
    else
    {
        l_count_ptr = &L_rx_slv_outbox_fail_count;
        l_modId = DCOM_MID_TASK_RX_SLV_OUTBOX;
    }

    if ( i_clear_error )
    {
        *l_count_ptr = 0;
    }
    else
    {
        (*l_count_ptr)++;

        if ( *l_count_ptr == DCOM_250us_GAP )
        {
            // Trace an imp trace log
            TRAC_IMP("l_count_ptr[%d], L_outbox[%d], L_inbox[%d]",
                *l_count_ptr,
                L_rx_slv_outbox_fail_count,
                G_dcomSlvInboxCounter.currentFailCount );
        }
        else if ( *l_count_ptr == DCOM_4MS_GAP )
        {
            // Create and commit error log
            // NOTE: SRC tags are NOT needed here, they are
            //       taken care of by the caller
            errlHndl_t  l_errl = createErrl(
                l_modId,                        //ModId
                i_orc,                          //Reasoncode
                i_orc_ext,                      //Extended reasoncode
                ERRL_SEV_UNRECOVERABLE,         //Severity
                NULL,                           //Trace Buf
                DEFAULT_TRACE_SIZE,             //Trace Size
                *l_count_ptr,                   //Userdata1
                0                               //Userdata2
                );

            // Commit log
            commitErrl( &l_errl );

            // Call request nominal macro to change state
            REQUEST_NOMINAL();
        }
        else if ( *l_count_ptr == DCOM_1S_GAP )
        {
            // Create and commit error log
            // NOTE: SRC tags are NOT needed here, they are
            //       taken care of by the caller
            errlHndl_t  l_errl = createErrl(
                l_modId,                        //ModId
                i_orc,                          //Reasoncode
                i_orc_ext,                      //Extended reasoncode
                ERRL_SEV_UNRECOVERABLE,         //Severity
                NULL,                           //Trace Buf
                DEFAULT_TRACE_SIZE,             //Trace Size
                *l_count_ptr,                   //Userdata1
                0                               //Userdata2
                );

            // Commit log
            // Call request reset macro
            REQUEST_RESET(l_errl);
        }
    }
}
예제 #16
0
void task_centaur_control( task_t * i_task )
{
    errlHndl_t            l_err     = NULL;    // Error handler
    int                   rc        = 0;       // Return code
    uint32_t              l_cent;
    amec_centaur_t        *l_cent_ptr = NULL;
    static uint8_t        L_scom_timeout[MAX_NUM_CENTAURS] = {0}; //track # of consecutive failures
    static bool           L_gpe_scheduled = FALSE;
    static uint8_t        L_gpe_fail_logged = 0;
    static bool           L_gpe_idle_traced = FALSE;
    static bool           L_gpe_had_1_tick = FALSE;

    // Pointer to the task data structure
    centaur_control_task_t * l_centControlTask =
            (centaur_control_task_t *) i_task->data_ptr;


    // Pointer to parameter field for GPE request
    GpeScomParms * l_parms =
          (GpeScomParms *)(l_centControlTask->gpe_req.parameter);

    do
    {
        l_cent = l_centControlTask->curCentaur;
        l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur;

        //First, check to see if the previous GPE request still running
        //A request is considered idle if it is not attached to any of the
        //asynchronous request queues
        if( !(async_request_is_idle(&l_centControlTask->gpe_req.request)) )
        {
            L_scom_timeout[l_cent]++;
            //This can happen due to variability in when the task runs
            if(!L_gpe_idle_traced && L_gpe_had_1_tick)
            {
                TRAC_INFO("task_centaur_control: GPE is still running. cent[%d]", l_cent);
                l_centControlTask->traceThresholdFlags |= CENTAUR_CONTROL_GPE_STILL_RUNNING;
                L_gpe_idle_traced = TRUE;
            }
            L_gpe_had_1_tick = TRUE;
            break;
        }
        else
        {
            //Request is idle
            L_gpe_had_1_tick = FALSE;
            if(L_gpe_idle_traced)
            {
                TRAC_INFO("task_centaur_control: GPE completed. cent[%d]", l_cent);
                L_gpe_idle_traced = FALSE;
            }
        }

        //check scom status
        if(L_gpe_scheduled)
        {
            if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc)
            {
                if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent)))
                {
                    // Check if the centaur has a channel checkstop. If it does,
                    // then do not log any errors. We also don't want to throttle
                    // a centaur that is in this condition.
                    if(!(cent_chan_checkstop(l_cent)))
                    {
                        L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> l_cent;
                        TRAC_ERR("task_centaur_control: gpe_scom_centaur failed. l_cent=%d rc=%x, index=0x%08x", l_cent, l_parms->rc, l_parms->errorIndex);

                        /* @
                         * @errortype
                         * @moduleid    CENT_TASK_CONTROL_MOD
                         * @reasoncode  CENT_SCOM_ERROR
                         * @userdata1   rc - Return code of scom operation
                         * @userdata2   index of scom operation that failed
                         * @userdata4   OCC_NO_EXTENDED_RC
                         * @devdesc     OCC access to centaur failed
                         */
                        l_err = createErrl(
                                CENT_TASK_CONTROL_MOD,                  // modId
                                CENT_SCOM_ERROR,                        // reasoncode
                                OCC_NO_EXTENDED_RC,                     // Extended reason code
                                ERRL_SEV_PREDICTIVE,                    // Severity
                                NULL,                                   // Trace Buf
                                DEFAULT_TRACE_SIZE,                     // Trace Size
                                l_parms->rc,                            // userdata1
                                l_parms->errorIndex                     // userdata2
                                );

                        addUsrDtlsToErrl(l_err,                                  //io_err
                                (uint8_t *) &(l_centControlTask->gpe_req.ffdc),  //i_dataPtr,
                                sizeof(PoreFfdc),                                //i_size
                                ERRL_USR_DTL_STRUCT_VERSION_1,                   //version
                                ERRL_USR_DTL_BINARY_DATA);                       //type

                        //callout the centaur
                        addCalloutToErrl(l_err,
                                         ERRL_CALLOUT_TYPE_HUID,
                                         G_sysConfigData.centaur_huids[l_cent],
                                         ERRL_CALLOUT_PRIORITY_MED);

                        //callout the processor
                        addCalloutToErrl(l_err,
                                         ERRL_CALLOUT_TYPE_HUID,
                                         G_sysConfigData.proc_huid,
                                         ERRL_CALLOUT_PRIORITY_MED);

                        commitErrl(&l_err);
                    }
                }//if(l_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent))

                //Request failed. Keep count of failures and request a reset if we reach a
                //max retry count
                L_scom_timeout[l_cent]++;
                if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT)
                {
                    break;
                }

            }//if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc)
            else
            {
                //request completed successfully.  reset the timeout.
                L_scom_timeout[l_cent] = 0;
            }
        }//if(L_gpe_scheduled)
예제 #17
0
파일: amec_pcap.c 프로젝트: open-power/occ
//////////////////////////
// Function Specification
//
// Name: amec_gpu_pcap
//
// Description: Determine power cap for GPUs
//
// Thread: Real Time Loop
//
// End Function Specification
void amec_gpu_pcap(bool i_oversubscription, bool i_active_pcap_changed, int32_t i_avail_power)
{
    /*------------------------------------------------------------------------*/
    /*  Local Variables                                                       */
    /*------------------------------------------------------------------------*/
    uint8_t  i = 0;
    uint32_t l_gpu_cap_mw = 0;
    uint16_t l_system_gpu_total_pcap = 0;  // total GPU pcap required by system based on if currently in oversub or not
    static uint16_t L_total_gpu_pcap = 0;  // Current total GPU pcap in effect
    static uint16_t L_n_plus_1_mode_gpu_total_pcap = 0;  // Total GPU pcap required for N+1 (not in oversubscription)
    static uint16_t L_n_mode_gpu_total_pcap = 0;  // Total GPU pcap required for oversubscription
    static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR
    static uint16_t L_per_gpu_pcap = 0;  // Amount of L_total_gpu_pcap for each GPU
    static uint8_t L_psr = 100;   // PSR value used in L_active_psr_gpu_total_pcap calculation
    static bool L_first_run = TRUE;  // for calculations done only 1 time

    static uint32_t L_last_pcap_traced[MAX_NUM_GPU_PER_DOMAIN] = {0};

    /*------------------------------------------------------------------------*/
    /*  Code                                                                  */
    /*------------------------------------------------------------------------*/
    // If this is the first time running calculate the total GPU power cap for system power caps (N and N+1)
    if(L_first_run)
    {
       // calculate total GPU power cap for oversubscription
       if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
       {
           // Take all non-GPU power away from the oversubscription power cap
           L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
           // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs
           L_n_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts;
       }
       else
       {
           // This should not happen, the total non GPU power should never be higher than the N mode cap
           // Log error and set GPUs to minimum power cap
           L_n_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap

           TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N mode pwr limit %dW",
                     G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.ovs_node_pcap);

           /* @
            * @errortype
            * @moduleid    AMEC_GPU_PCAP_MID
            * @reasoncode  GPU_FAILURE
            * @userdata1   N mode Power Cap watts
            * @userdata2   Total non-GPU power watts
            * @userdata4   ERC_GPU_N_MODE_PCAP_CALC_FAILURE
            * @devdesc     Total non-GPU power more than N mode power cap
            *
            */
           errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID,
                                         GPU_FAILURE,
                                         ERC_GPU_N_MODE_PCAP_CALC_FAILURE,
                                         ERRL_SEV_PREDICTIVE,
                                         NULL,
                                         DEFAULT_TRACE_SIZE,
                                         g_amec->pcap.ovs_node_pcap,
                                         G_sysConfigData.total_non_gpu_max_pwr_watts);

           //Callout firmware
           addCalloutToErrl(l_err,
                            ERRL_CALLOUT_TYPE_COMPONENT_ID,
                            ERRL_COMPONENT_ID_FIRMWARE,
                            ERRL_CALLOUT_PRIORITY_HIGH);
           commitErrl(&l_err);
       }

       // calculate total GPU power cap for N+1 (not in oversubscription)
       if(G_sysConfigData.pcap.system_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
       {
           // Take all non-GPU power away from the N+1 power cap
           L_n_plus_1_mode_gpu_total_pcap = G_sysConfigData.pcap.system_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
           // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs
           L_n_plus_1_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts;
       }
       else
       {
           // This should not happen, the total non GPU power should never be higher than the N+1 mode cap
           // Log error and set GPUs to minimum power cap
           L_n_plus_1_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap

           TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N+1 mode pwr limit %dW",
                     G_sysConfigData.total_non_gpu_max_pwr_watts, G_sysConfigData.pcap.system_pcap);

           /* @
            * @errortype
            * @moduleid    AMEC_GPU_PCAP_MID
            * @reasoncode  GPU_FAILURE
            * @userdata1   N+1 mode Power Cap watts
            * @userdata2   Total non-GPU power watts
            * @userdata4   ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE
            * @devdesc     Total non-GPU power more than N+1 mode power cap
            *
            */
           errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID,
                                         GPU_FAILURE,
                                         ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE,
                                         ERRL_SEV_PREDICTIVE,
                                         NULL,
                                         DEFAULT_TRACE_SIZE,
                                         G_sysConfigData.pcap.system_pcap,
                                         G_sysConfigData.total_non_gpu_max_pwr_watts);

           //Callout firmware
           addCalloutToErrl(l_err,
                            ERRL_CALLOUT_TYPE_COMPONENT_ID,
                            ERRL_COMPONENT_ID_FIRMWARE,
                            ERRL_CALLOUT_PRIORITY_HIGH);
           commitErrl(&l_err);
       }
    }  // if first run

    // Calculate the total GPU power cap for the current active limit and PSR
    // this only needs to be calculated if either the active limit or PSR changed
    if( (L_first_run) || (i_active_pcap_changed) || (L_psr != G_sysConfigData.psr) )
    {
       L_psr = G_sysConfigData.psr;
       if(g_amec->pcap.active_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts)
       {
           // Take all non-GPU power away from the active power cap
           L_active_psr_gpu_total_pcap = g_amec->pcap.active_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts;
           // Add back in the power that will be dropped by processor DVFS and memory throttling based on the PSR
           // to give to GPUs
           L_active_psr_gpu_total_pcap += ( (L_psr / 100) * G_sysConfigData.total_proc_mem_pwr_drop_watts );
       }
       else
       {
           // Set GPUs to minimum power cap
           L_active_psr_gpu_total_pcap = 0;
           TRAC_IMP("amec_gpu_pcap: non GPU max power %dW is more than active pwr limit %dW",
                     G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap);
       }

       // Total GPU power cap is the lower of system (N+1 or oversubscription depending on if in oversub)
       // and the active power limit.  We do not need to always account for oversubscription since
       // the automatic hw power brake will assert to the GPUs if there is a problem when oversub is
       // entered from the time OCC can set and GPUs react to a new power limit
       if(i_oversubscription)
       {
          // system in oversubscription use N mode cap
          l_system_gpu_total_pcap = L_n_mode_gpu_total_pcap;
       }
       else
       {
          // system is not in oversubscription use N+1 mode cap
          l_system_gpu_total_pcap = L_n_plus_1_mode_gpu_total_pcap;
       }

       L_total_gpu_pcap = (l_system_gpu_total_pcap < L_active_psr_gpu_total_pcap) ?
                           l_system_gpu_total_pcap : L_active_psr_gpu_total_pcap;

       // Divide the total equally across all GPUs in the system
       if(G_first_num_gpus_sys)
       {
          L_per_gpu_pcap = L_total_gpu_pcap / G_first_num_gpus_sys;
       }
       else
       {
           L_per_gpu_pcap = 0;
           TRAC_ERR("amec_gpu_pcap: Called with no GPUs present!");
       }
    }

    // Setup to send new power limit to GPUs. The actual sending of GPU power limit will be handled by task_gpu_sm()
    for (i=0; i<MAX_NUM_GPU_PER_DOMAIN; i++)
    {
        // Before sending a GPU a power limit the power limits must be read from the GPU to know min/max GPU allows
        if( GPU_PRESENT(i) && g_amec->gpu[i].pcap.pwr_limits_read )
        {
           l_gpu_cap_mw = L_per_gpu_pcap * 1000;  // convert W to mW

           // GPU is present and have min/max power limits from GPU
           // clip the GPU power limit to min/max GPU limit if needed
           if(l_gpu_cap_mw < g_amec->gpu[i].pcap.gpu_min_pcap_mw)  // clip to min?
           {
              l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw;
           }
           else if(l_gpu_cap_mw > g_amec->gpu[i].pcap.gpu_max_pcap_mw)  // clip to max?
           {
              l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_max_pcap_mw;
           }

           // check if this is a new power limit
           if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw)
           {
              if( (g_amec->gpu[i].pcap.gpu_desired_pcap_mw != 0) ||
                  (L_last_pcap_traced[i] != l_gpu_cap_mw) )
              {
                 L_last_pcap_traced[i] = l_gpu_cap_mw;
                 TRAC_IMP("amec_gpu_pcap: Updating GPU%d desired pcap %dmW to %dmW", i,
                          g_amec->gpu[i].pcap.gpu_desired_pcap_mw, l_gpu_cap_mw);

              }

              g_amec->gpu[i].pcap.gpu_desired_pcap_mw = l_gpu_cap_mw;
           }
        }
    }  // for each GPU

    L_first_run = FALSE;
}
예제 #18
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestAddTraceToErrl
//
// Description: errlTestAddTraceToErrl
//
// End Function Specification
uint32_t errlTestAddTraceToErrl()
{
    uint32_t l_rc = 0;
    uint16_t l_entrySizeBefore = 0;
    uint16_t l_entrySizeAfter = 0;
    ERRL_DBG("START");

    do
    {
        // Create one err log
        errlHndl_t l_handle = NULL;
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, 512, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // l_handle will set to NULL after calling the commitErrl, so we need to store it
        errlHndl_t l_handleX = l_handle;
        ERRL_DBG("Slots after Create - 1 slots should be used (one of each");
        ppdumpslot();

        /****************************************************/
        // Test size limit for addTraceToErrl
        // Add "trace" data that exceeds the max size
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, MAX_BUFFER_SIZE, l_handle);
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeAfter <= MAX_ERRL_ENTRY_SZ, l_rc);

        dumpLog( l_handle, l_handle->iv_userDetails.iv_entrySize );
        commitErrl( &l_handle );
        ERRL_DBG("Slots after Commit -  1 slots should be used/committed");
        ppdumpslot();

        deleteErrl(&l_handleX);
        ERRL_DBG("Slots after delete Log - All slots should be empty");
        ppdumpslot();

        /****************************************************/
        // Test size limit for addTraceToErrl with continuous calls
        // Create log with 512 bytes trace
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, g_trac_inf, 512, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // l_handle will set to NULL after calling the commitErrl, so we need to store it
        l_handleX = l_handle;
        ppdumpslot();

        // Add 256 bytes of trace (512+256)
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 256, l_handle); // @at012c
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 bytes" );
        ppdumpslot();
        // (header + 256) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter <= (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+256), l_rc);

        // Add 512 bytes of trace (512+256+512)
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 512, l_handle); // @at012c
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 + 512 bytes");
        ppdumpslot();
        CHECK_CONDITION( l_entrySizeAfter <= (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+512), l_rc);

        // Add 1024 bytes of trace (512+256+512+1024), the entry size is more than 2048 now
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        addTraceToErrl(g_trac_inf, 1024, l_handle); // @at012c
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 + 512 bytes");
        ppdumpslot();
        CHECK_CONDITION( l_entrySizeAfter <= MAX_ERRL_ENTRY_SZ, l_rc);

        commitErrl( &l_handle );
        deleteErrl(&l_handleX);
        ERRL_DBG("Slots should now be empty");
        ppdumpslot();
        ERRL_DBG("END \n");

    }while(0);

    return l_rc;
}
예제 #19
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestAddUsrDtlsToErrl
//
// Description: errlTestAddUsrDtlsToErrl
//
// End Function Specification
uint32_t errlTestAddUsrDtlsToErrl()
{
    uint32_t l_rc = 0;
    ERRL_DBG("START");
    uint16_t l_entrySizeBefore = 0;
    uint16_t l_entrySizeAfter = 0;

    do
    {
        // Create three err logs
        errlHndl_t l_handle = NULL;
        errlHndl_t l_handle2 = NULL;
        errlHndl_t l_handle3 = NULL;

        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_UNRECOVERABLE, NULL, 512, 0x1, 0x2);
        l_handle2 = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_CALLHOME_DATA, NULL, 512, 0x1, 0x2);
        l_handle3 = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_INFORMATIONAL, NULL, 512, 0x1, 0x2);

        // l_handle will set to NULL after calling the commitErrl, so we need to store it
        errlHndl_t l_handleX = l_handle;
        errlHndl_t l_handle2X = l_handle2;
        errlHndl_t l_handle3X = l_handle3;
        ERRL_DBG("Slots after Create - 3 slots should be used (one of each");
        ppdumpslot();

        CHECK_CONDITION( (l_handle != INVALID_ERR_HNDL) &&
                         (l_handle2 != INVALID_ERR_HNDL) &&
                         (l_handle3 != INVALID_ERR_HNDL), l_rc);

        /****************************************************/
        // Test size limit for addUsrDtlsToErrl
        // Add "user details" data that exceeds the max size for l_handle
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        memset( G_data, 0xCC, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle, G_data, sizeof( G_data ), ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeAfter == MAX_ERRL_ENTRY_SZ, l_rc);

        // Add "user details" data that exceeds the max size for l_handle2
        l_entrySizeBefore = l_handle2->iv_userDetails.iv_entrySize;
        memset( G_data, 0xDD, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle2, G_data, sizeof( G_data ), ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_CALLHOME_DATA );
        l_entrySizeAfter = l_handle2->iv_userDetails.iv_entrySize;
        CHECK_CONDITION( l_entrySizeAfter == MAX_ERRL_CALL_HOME_SZ, l_rc);

        // Add "user details" with size 76 for l_handle3
        l_entrySizeBefore = l_handle3->iv_userDetails.iv_entrySize;
        memset( G_data, 0xEE, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle3, G_data, 76, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle3->iv_userDetails.iv_entrySize;
        // (header + 76) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter == (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+76), l_rc);

        dumpLog( l_handle, l_handle->iv_userDetails.iv_entrySize );
        dumpLog( l_handle2, l_handle2->iv_userDetails.iv_entrySize );
        dumpLog( l_handle3, l_handle3->iv_userDetails.iv_entrySize );

        commitErrl( &l_handle );
        commitErrl( &l_handle2 );
        commitErrl( &l_handle3 );
        ERRL_DBG("Slots after Commit -  3 slots should be used/committed");
        ppdumpslot();

        deleteErrl(&l_handleX);
        deleteErrl(&l_handle2X);
        deleteErrl(&l_handle3X);
        CHECK_CONDITION( (l_handleX == NULL) &&
                         (l_handle2X == NULL) &&
                         (l_handle3X == NULL), l_rc);

        ERRL_DBG("Slots after delete Log - All slots should be empty");
        ppdumpslot();

        /****************************************************/
        // Test size limit for addUsrDtlsToErrl with continuous calls
        // Create log with 512 bytes trace
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, g_trac_inf, 512, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // l_handle will set to NULL after calling the commitErrl, so we need to store it
        l_handleX = l_handle;
        ppdumpslot();

        // add 256 bytes of "user details" (512+256)
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        memset( G_data, 0xAA, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle, G_data, 256, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 bytes" );
        ppdumpslot();
        // (header + 256) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter == (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+256), l_rc);

        // add 512 bytes of "user details" (512+256+512)
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        memset( G_data, 0xBB, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle, G_data, 512, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 + 512 bytes");
        ppdumpslot();
        // (header + 512) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter == (l_entrySizeBefore+sizeof(ErrlUserDetailsEntry_t)+512), l_rc);

        // add 1024 bytes of "user details" (512+256+512+1024), the entry size is more than 2048 now
        l_entrySizeBefore = l_handle->iv_userDetails.iv_entrySize;
        memset( G_data, 0xCC, sizeof( G_data ) );
        addUsrDtlsToErrl( l_handle, G_data, 1024, ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA );
        l_entrySizeAfter = l_handle->iv_userDetails.iv_entrySize;
        ERRL_DBG("Slots after create + 256 + 512 +1024 bytes");
        ppdumpslot();
        // (header + 1024) is the size that add to entry
        CHECK_CONDITION( l_entrySizeAfter <= MAX_ERRL_ENTRY_SZ, l_rc); // @at012c

        commitErrl( &l_handle );
        deleteErrl(&l_handleX);
        ERRL_DBG("Slots should now be empty");
        ppdumpslot();
        ERRL_DBG("END \n");
    }while(0);

    return l_rc;
}
예제 #20
0
// Function Specification
//
// Name: amec_update_vrm_sensors
//
// Description: Updates sensors that use data from the VRMs
// (e.g., VR_FAN, FANS_FULL_SPEED, VR_HOT).
//
// Thread: RealTime Loop
//
// End Function Specification
void amec_update_vrm_sensors(void)
{
    /*------------------------------------------------------------------------*/
    /*  Local Variables                                                       */
    /*------------------------------------------------------------------------*/
    int                         l_rc = 0;
    int                         l_vrfan = 0;
    int                         l_softoc = 0;
    int                         l_minus_np1_regmode = 0;
    int                         l_minus_n_regmode = 0;
    static uint8_t              L_error_count = 0;
    uint8_t                     l_pin = 0;
    uint8_t                     l_pin_value = 1; // active low, so set default to high
    uint8_t                     l_vrhot_count = 0;
    errlHndl_t                  l_err = NULL;

    /*------------------------------------------------------------------------*/
    /*  Code                                                                  */
    /*------------------------------------------------------------------------*/

    // Check if we have access to SPIVID. In DCMs only Master OCC has access to
    // the SPIVID.
    if (G_dcm_occ_role == OCC_DCM_MASTER)
    {
        // VR_FAN and SOFT_OC come from SPIVID
        l_rc = vrm_read_state(SPIVRM_PORT(0),
                            &l_minus_np1_regmode,
                            &l_minus_n_regmode,
                            &l_vrfan,
                            &l_softoc);

        if (l_rc == 0)
        {
            // Update the VR_FAN sensor
            sensor_update( AMECSENSOR_PTR(VRFAN250USPROC), (uint16_t)l_vrfan );

            // Clear our error count and the 'read failure' flag (since we can
            // read VR_FAN signal)
            L_error_count = 0;
            G_thrm_fru_data[DATA_FRU_VRM].read_failure = 0;

            // Obtain the 'fan_full_speed' GPIO from APSS
            l_pin = G_sysConfigData.apss_gpio_map.fans_full_speed;

            // No longer reading gpio from APSS in GA1 due to instability in
            // APSS composite mode
            //apss_gpio_get(l_pin, &l_pin_value);

            // VR_HOT sensor is a counter of number of times the VRHOT signal
            // has been asserted
            l_vrhot_count = AMECSENSOR_PTR(VRHOT250USPROC)->sample;

            // Check if VR_FAN is asserted AND if 'fans_full_speed' GPIO is ON.
            // Note that this GPIO is active low.
            if (AMECSENSOR_PTR(VRFAN250USPROC)->sample && !(l_pin_value))
            {
                // VR_FAN is asserted and 'fans_full_speed' GPIO is ON,
                // then increment our VR_HOT counter
                if (l_vrhot_count < g_amec->vrhotproc.setpoint)
                {
                    l_vrhot_count++;
                }
            }
            else
            {
                // Reset our VR_HOT counter
                l_vrhot_count = 0;
            }
            sensor_update(AMECSENSOR_PTR(VRHOT250USPROC), l_vrhot_count);
        }
        else
        {
            // Increment our error count
            L_error_count++;

            // Don't allow the error count to wrap
            if (L_error_count == 0)
            {
                L_error_count = 0xFF;
            }

            // Log an error if we exceeded our number of fail-to-read sensor
            if ((L_error_count == g_amec->proc[0].vrfan_error_count) &&
                (g_amec->proc[0].vrfan_error_count != 0xFF))
            {
                TRAC_ERR("amec_update_vrm_sensors: Failed to read VR_FAN for %u consecutive times!",
                         L_error_count);

                // Also, inform the thermal thread to send a cooling request
                G_thrm_fru_data[DATA_FRU_VRM].read_failure = 1;

                /* @
                 * @errortype
                 * @moduleid    AMEC_HEALTH_CHECK_VRFAN_TIMEOUT
                 * @reasoncode  VRM_VRFAN_TIMEOUT
                 * @userdata1   timeout value
                 * @userdata2   0
                 * @userdata4   OCC_NO_EXTENDED_RC
                 * @devdesc     Failed to read VR_FAN signal from regulator.
                 *
                 */
                l_err = createErrl(AMEC_HEALTH_CHECK_VRFAN_TIMEOUT,  //modId
                                   VRM_VRFAN_TIMEOUT,                //reasoncode
                                   OCC_NO_EXTENDED_RC,               //Extended reason code
                                   ERRL_SEV_PREDICTIVE,              //Severity
                                   NULL,                             //Trace Buf
                                   DEFAULT_TRACE_SIZE,               //Trace Size
                                   g_amec->thermaldimm.temp_timeout, //userdata1
                                   0);                               //userdata2

                // Callout backplane for this VRM error
                addCalloutToErrl(l_err,
                                 ERRL_CALLOUT_TYPE_HUID,
                                 G_sysConfigData.backplane_huid,
                                 ERRL_CALLOUT_PRIORITY_MED);

                // Commit the error
                commitErrl(&l_err);
            }
        }
    }

    if( 1 )
    {
        sensor_update( AMECSENSOR_PTR(VRFAN250USMEM), 0 );
        sensor_update( AMECSENSOR_PTR(VRHOT250USMEM), 0 );
    }
}
예제 #21
0
파일: errlTest.c 프로젝트: deece/occ
// Function Specification
//
// Name: errlTestCallouts
//
// Description: errlTestCallouts
//
// End Function Specification
uint32_t errlTestCallouts()
{
    uint32_t l_rc = 0;
    ERRL_DBG("START");

    do
    {
        errlHndl_t l_handle = NULL;
        ERRL_DBG("--------------------------------\n");

        /****************************************************/
        // Check max callouts
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE,g_trac_inf, 128, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        ERRL_CALLOUT_PRIORITY l_array[8] = {
                                             ERRL_CALLOUT_PRIORITY_HIGH,
                                             ERRL_CALLOUT_PRIORITY_MED,
                                             ERRL_CALLOUT_PRIORITY_LOW,
                                             ERRL_CALLOUT_PRIORITY_HIGH,
                                             ERRL_CALLOUT_PRIORITY_MED,
                                             ERRL_CALLOUT_PRIORITY_MED,
                                             ERRL_CALLOUT_PRIORITY_LOW,
                                             ERRL_CALLOUT_PRIORITY_LOW,
                                            };

        ERRL_CALLOUT_TYPE l_type[8] = {
                                        ERRL_CALLOUT_TYPE_HUID,
                                        ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                        ERRL_CALLOUT_TYPE_HUID,
                                        ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                        ERRL_CALLOUT_TYPE_HUID,
                                        ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                        ERRL_CALLOUT_TYPE_HUID,
                                        ERRL_CALLOUT_TYPE_COMPONENT_ID,
                                       };

        // add 6 (ERRL_MAX_CALLOUTS) callouts
        uint8_t l_index = 0;
        for(l_index = 0; l_index < ERRL_MAX_CALLOUTS; l_index++)
        {
            ERRL_DBG("current callouts %d attempting to add callout # %d with type %d ,priority %d", l_handle->iv_numCallouts, l_index, l_type[l_index], l_array[l_index] );
            addCalloutToErrl(l_handle,l_type[l_index],l_index,l_array[l_index]);
        }
        CHECK_CONDITION( l_handle->iv_numCallouts == ERRL_MAX_CALLOUTS, l_rc);

        // add one more callout and it should fail
        addCalloutToErrl(l_handle,l_type[0],l_index,l_array[0]);
        CHECK_CONDITION( l_handle->iv_numCallouts == ERRL_MAX_CALLOUTS, l_rc);

        dumpLog( l_handle, l_handle->iv_userDetails.iv_entrySize );
        deleteErrl( &l_handle );
        ppdumpslot();

        /****************************************************/
        // Check callouts after errl is committed
        // Create log
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE,g_trac_inf, 32, 0x1, 0x2);
        errlHndl_t l_log = l_handle;
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);

        // Commit log and add callout. But adding callout should fail
        commitErrl( &l_handle );
        addCalloutToErrl(l_handle,l_type[0],0,l_array[0]);
        CHECK_CONDITION( l_log->iv_numCallouts == ERRL_MAX_CALLOUTS, l_rc);

        deleteErrl(&l_log);

        /****************************************************/
        // Check addCalloutToErrl for ERRL_SEV_INFORMATIONAL log
        // Create ERRL_SEV_INFORMATIONAL log
        l_handle = createErrl( TEST_MODULE_ID, 0x08, OCC_NO_EXTENDED_RC, ERRL_SEV_INFORMATIONAL,g_trac_inf, 128, 0x1, 0x2);
        CHECK_CONDITION( l_handle != INVALID_ERR_HNDL, l_rc);
        if(l_handle == INVALID_ERR_HNDL)

        // add one callout and it should fail
        addCalloutToErrl(l_handle,l_type[0],l_index,l_array[0]);
        CHECK_CONDITION( l_handle->iv_numCallouts == 0, l_rc);

        dumpLog( l_handle, l_handle->iv_userDetails.iv_entrySize );
        deleteErrl( &l_handle );
        ppdumpslot();

        ERRL_DBG("END \n");
    }while(0);

    return l_rc;
}
예제 #22
0
파일: reset.c 프로젝트: code-hippo/occ
// Function Specification
//
// Name: task_check_for_checkstop
//
// Description: Check for checkstop
//
// End Function Specification
void task_check_for_checkstop(task_t *i_self)
{
    pore_status_t l_gpe0_status;
    ocb_oisr0_t l_oisr0_status;
    static bool L_checkstop_traced = FALSE;
    uint8_t l_reason_code = 0;

    do
    {
        // This check is disabled once a checkstop or frozen GPE is detected
        if(L_checkstop_traced)
        {
            break;
        }

        // Looked for a frozen GPE, a sign that the chip has stopped working or
        // check-stopped.  This check also looks for an interrupt status flag that
        // indicates if the system has check-stopped.
        l_gpe0_status.value = in64(PORE_GPE0_STATUS);
        l_oisr0_status.value = in32(OCB_OISR0);

        if (l_gpe0_status.fields.freeze_action
            ||
            l_oisr0_status.fields.check_stop)
        {
            errlHndl_t l_err = NULL;

            if (l_gpe0_status.fields.freeze_action)
            {
                TRAC_IMP("Frozen GPE0 detected by RTL");
                l_reason_code = OCC_GPE_HALTED;
            }

            if (l_oisr0_status.fields.check_stop)
            {
                TRAC_IMP("System checkstop detected by RTL");
                l_reason_code = OCC_SYSTEM_HALTED;
            }

            L_checkstop_traced = TRUE;

            /*
             * @errortype
             * @moduleid    MAIN_SYSTEM_HALTED_MID
             * @reasoncode  OCC_GPE_HALTED
             * @userdata1   High order word of PORE_GPE0_STATUS
             * @userdata2   OCB_OISR0
             * @devdesc     OCC detected frozen GPE0
             */
            /*
             * @errortype
             * @moduleid    MAIN_SYSTEM_HALTED_MID
             * @reasoncode  OCC_SYSTEM_HALTED
             * @userdata1   High order word of PORE_GPE0_STATUS
             * @userdata2   OCB_OISR0
             * @devdesc     OCC detected system checkstop
             */
             l_err = createErrl(MAIN_SYSTEM_HALTED_MID,
                                l_reason_code,
                                OCC_NO_EXTENDED_RC,
                                ERRL_SEV_INFORMATIONAL,
                                NULL,
                                DEFAULT_TRACE_SIZE,
                                l_gpe0_status.words.high_order,
                                l_oisr0_status.value);

             // The commit code will check for the frozen GPE0 and system
             // checkstop conditions and take appropriate actions.
             commitErrl(&l_err);
        }
    }
    while(0);
}
예제 #23
0
파일: proc_data.c 프로젝트: code-hippo/occ
void task_core_data( task_t * i_task )
{

    errlHndl_t  l_err = NULL;       //Error handler
    tracDesc_t  l_trace = NULL;     //Temporary trace descriptor
    int         rc = 0;     //return code
    bulk_core_data_task_t * l_bulk_core_data_ptr = (bulk_core_data_task_t *)i_task->data_ptr;
    GpeGetCoreDataParms * l_parms = (GpeGetCoreDataParms *)(l_bulk_core_data_ptr->gpe_req.parameter);
    gpe_bulk_core_data_t  * l_temp = NULL;

    do
    {
        //First, check to see if the previous GPE request still running
        //A request is considered idle if it is not attached to any of the
        //asynchronous request queues
        if( !(async_request_is_idle(&l_bulk_core_data_ptr->gpe_req.request)) )
        {
            //This should not happen unless there's a problem
            //Trace 1 time
            if( !G_queue_not_idle_traced )
            {
                TRAC_ERR("Core data GPE is still running \n");
                G_queue_not_idle_traced = TRUE;
            }
            break;
        }

        //Need to complete collecting data for all assigned cores from previous interval
        //and tick 0 is the current tick before collect data again.
        if( (l_bulk_core_data_ptr->current_core == l_bulk_core_data_ptr->end_core)
            &&
            ((CURRENT_TICK & (MAX_NUM_TICKS - 1)) != 0) )
        {
            PROC_DBG("Not collect data. Need to wait for tick.\n");
            break;
        }

        //Check to see if the previously GPE request has successfully completed
        //A request is not considered complete until both the engine job
        //has finished without error and any callback has run to completion.

        if( async_request_completed(&l_bulk_core_data_ptr->gpe_req.request)
            &&
            CORE_PRESENT(l_bulk_core_data_ptr->current_core) )
        {
            //If the previous GPE request succeeded then swap core_data_ptr
            //with the global one. The gpe routine will write new data into
            //a buffer that is not being accessed by the RTLoop code.

            PROC_DBG( "Swap core_data_ptr [%x] with the global one\n",
                     l_bulk_core_data_ptr->current_core );

            //debug only
#ifdef PROC_DEBUG
            print_core_status(l_bulk_core_data_ptr->current_core);
            print_core_data_sensors(l_bulk_core_data_ptr->current_core);
#endif

            l_temp = l_bulk_core_data_ptr->core_data_ptr;
            l_bulk_core_data_ptr->core_data_ptr =
                    G_core_data_ptrs[l_bulk_core_data_ptr->current_core];
            G_core_data_ptrs[l_bulk_core_data_ptr->current_core] = l_temp;

            //Core data has been collected so set the bit in global mask.
            //AMEC code will know which cores to update sensors for. AMEC is
            //responsible for clearing the bit later on.
            G_updated_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core);

            // Presumptively clear the empath error mask
            G_empath_error_core_mask &=
                    ~(CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core));

            // The gpe_data collection code has to handle the workaround for
            // HW280375.  Two new flags have been added to the OHA_RO_STATUS_REG
            // image to indicate whether the EMPATH collection failed, and
            // whether it was due to an "expected" error that we can ignore
            // (we can ignore the data as well), or an "unexpected" error that
            // we will create an informational log one time.
            //
            // The "expected" errors are very rare in practice, in fact we may
            // never even see them unless running a specific type of workload.
            // If you want to test the handling of expected errors compile the
            // GPE code with -DINJECT_HW280375_ERRORS which will inject an error
            // approximately every 1024 samples
            //
            // To determine if the expected error has occurred inspect the
            // CoreDataOha element of the CoreData structure written by the GPE
            // core data job.  The OHA element contains the oha_ro_status_reg.
            // Inside the OHA status register is a 16 bit reserved field.
            // gpe_data.h defines two masks that can be applied against the
            // reserved field to check for these errors:
            // CORE_DATA_EXPECTED_EMPATH_ERROR
            // CORE_DATA_UNEXPECTED_EMPATH_ERROR
            // Also, a 4-bit PCB parity + error code is saved at bit position:
            // CORE_DATA_EMPATH_ERROR_LOCATION, formally the length is
            // specified by: CORE_DATA_EMPATH_ERROR_BITS
            gpe_bulk_core_data_t *l_core_data =
                    G_core_data_ptrs[l_bulk_core_data_ptr->current_core];

            // We will trace the errors, but only a certain number of
            // times, we will only log the unexpected error once.
#define OCC_EMPATH_ERROR_THRESH 10
            static uint32_t L_expected_emp_err_cnt = 0;
            static uint32_t L_unexpected_emp_err_cnt = 0;

            // Check the reserved field for the expected or the unexpected error flag
            if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR)
                ||
                (l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR))
            {
                // Indicate empath error on current core
                G_empath_error_core_mask |=
                        CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core);

                // Save the high and low order words of the OHA status reg
                uint32_t l_oha_reg_high = l_core_data->oha.oha_ro_status_reg.words.high_order;
                uint32_t l_oha_reg_low = l_core_data->oha.oha_ro_status_reg.words.low_order;

                // Handle each error case
                if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR)
                    &&
                    (L_expected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH))
                {
                    L_expected_emp_err_cnt++;
                    TRAC_IMP("Expected empath collection error occurred %d time(s)! Core = %d",
                             L_expected_emp_err_cnt,
                             l_bulk_core_data_ptr->current_core);
                    TRAC_IMP("OHA status register: 0x%4.4x%4.4x",
                             l_oha_reg_high, l_oha_reg_low);
                }

                if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR)
                    &&
                    (L_unexpected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH))
                {
                    L_unexpected_emp_err_cnt++;
                    TRAC_ERR("Unexpected empath collection error occurred %d time(s)! Core = %d",
                             L_unexpected_emp_err_cnt,
                             l_bulk_core_data_ptr->current_core);
                    TRAC_ERR("OHA status register: 0x%4.4x%4.4x",
                             l_oha_reg_high, l_oha_reg_low);

                    // Create and commit an informational error the first
                    // time this occurs.
                    if (L_unexpected_emp_err_cnt == 1)
                    {
                        TRAC_IMP("Logging unexpected empath collection error 1 time only.");
                        /*
                        * @errortype
                        * @moduleid    PROC_TASK_CORE_DATA_MOD
                        * @reasoncode  INTERNAL_HW_FAILURE
                        * @userdata1   OHA status reg high
                        * @userdata2   OHA status reg low
                        * @userdata4   ERC_PROC_CORE_DATA_EMPATH_ERROR
                        * @devdesc     An unexpected error occurred while
                        *              collecting core empath data.
                        */
                        l_err = createErrl(
                                PROC_TASK_CORE_DATA_MOD, //modId
                                INTERNAL_HW_FAILURE,     //reason code
                                ERC_PROC_CORE_DATA_EMPATH_ERROR, //Extended reason code
                                ERRL_SEV_INFORMATIONAL,  //Severity
                                NULL,                    //Trace
                                DEFAULT_TRACE_SIZE,      //Trace Size
                                l_oha_reg_high,          //userdata1
                                l_oha_reg_low);          //userdata2

                        commitErrl(&l_err);
                    }
                }
            }
        }
예제 #24
0
void amec_update_fw_sensors(void)
{
    errlHndl_t l_err                = NULL;
    int rc                          = 0;
    int rc2                         = 0;
    static bool l_first_call        = TRUE;
    bool l_gpe0_idle, l_gpe1_idle;
    static int L_consec_trace_count = 0;

    // ------------------------------------------------------
    // Update OCC Firmware Sensors from last tick
    // ------------------------------------------------------
    int l_last_state = G_fw_timing.amess_state;
    // RTLtickdur    = duration of last tick's RTL ISR (max = 250us)
    sensor_update( AMECSENSOR_PTR(RTLtickdur), G_fw_timing.rtl_dur);
    // AMEintdur     = duration of last tick's AMEC portion of RTL ISR
    sensor_update( AMECSENSOR_PTR(AMEintdur), G_fw_timing.ameint_dur);
    // AMESSdurX     = duration of last tick's AMEC state
    if(l_last_state >= NUM_AMEC_SMH_STATES)
    {
        // Sanity check.  Trace this out, even though it should never happen.
        TRAC_INFO("AMEC State Invalid, Sensor Not Updated");
    }
    else
    {
        // AMESSdurX = duration of last tick's AMEC state
        sensor_update( AMECSENSOR_ARRAY_PTR(AMESSdur0, l_last_state),  G_fw_timing.amess_dur);
    }

    // ------------------------------------------------------
    // Kick off GPE programs to track WorstCase time in GPE
    // and update the sensors.
    // ------------------------------------------------------
    if( (NULL != G_fw_timing.gpe0_timing_request)
        && (NULL != G_fw_timing.gpe1_timing_request) )
    {
        //Check if both GPE engines were able to complete the last GPE job on
        //the queue within 1 tick.
        l_gpe0_idle = async_request_is_idle(&G_fw_timing.gpe0_timing_request->request);
        l_gpe1_idle = async_request_is_idle(&G_fw_timing.gpe1_timing_request->request);
        if(l_gpe0_idle && l_gpe1_idle)
        {
            //reset the consecutive trace count
            L_consec_trace_count = 0;

            //Both GPE engines finished on time. Now check if they were
            //successful too.
            if( async_request_completed(&(G_fw_timing.gpe0_timing_request->request))
                && async_request_completed(&(G_fw_timing.gpe1_timing_request->request)) )
            {
                // GPEtickdur0 = duration of last tick's PORE-GPE0 duration
                sensor_update( AMECSENSOR_PTR(GPEtickdur0), G_fw_timing.gpe_dur[0]);
                // GPEtickdur1 = duration of last tick's PORE-GPE1 duration
                sensor_update( AMECSENSOR_PTR(GPEtickdur1), G_fw_timing.gpe_dur[1]);
            }
            else
            {
                //This case is expected on the first call of the function.
                //After that, this should not happen.
                if(!l_first_call)
                {
                    //Note: FFDC for this case is gathered by each task
                    //responsible for a GPE job.
                    TRAC_INFO("GPE task idle but GPE task did not complete");
                }
                l_first_call = FALSE;
            }

            // Update Time used to measure GPE duration.
            G_fw_timing.rtl_start_gpe = G_fw_timing.rtl_start;

            // Schedule the GPE Routines that will run and update the worst
            // case timings (via callback) after they complete.  These GPE
            // routines are the last GPE routines added to the queue
            // during the RTL tick.
            rc  = pore_flex_schedule(G_fw_timing.gpe0_timing_request);
            rc2 = pore_flex_schedule(G_fw_timing.gpe1_timing_request);

            if(rc || rc2)
            {
                /* @
                 * @errortype
                 * @moduleid    AMEC_UPDATE_FW_SENSORS
                 * @reasoncode  SSX_GENERIC_FAILURE
                 * @userdata1   return code - gpe0
                 * @userdata2   return code - gpe1
                 * @userdata4   OCC_NO_EXTENDED_RC
                 * @devdesc     Failure to schedule PORE-GPE poreFlex object for FW timing
                 *              analysis.
                 */
                l_err = createErrl(
                    AMEC_UPDATE_FW_SENSORS,             //modId
                    SSX_GENERIC_FAILURE,                //reasoncode
                    OCC_NO_EXTENDED_RC,                 //Extended reason code
                    ERRL_SEV_INFORMATIONAL,             //Severity
                    NULL,                               //Trace Buf
                    DEFAULT_TRACE_SIZE,                 //Trace Size
                    rc,                                 //userdata1
                    rc2);                               //userdata2

                // commit error log
                commitErrl( &l_err );
            }
        }
        else if(L_consec_trace_count < MAX_CONSEC_TRACE)
        {
            uint64_t l_dbg1;

            // Reset will eventually be requested due to not having power measurement
            // data after X ticks, but add some additional FFDC to the trace that
            // will tell us what GPE job is currently executing.
            if(!l_gpe0_idle)
            {
                l_dbg1 = in64(PORE_GPE0_DBG1);
                TRAC_ERR("GPE0 programs did not complete within one tick. DBG1[0x%08x%08x]",
                          l_dbg1 >> 32,
                          l_dbg1 & 0x00000000ffffffffull);
            }