// Function Specification // // Name: apssInitApplet // // Description: Entry point function // // End Function Specification errlHndl_t apssInitApplet(void * i_arg) { errlHndl_t l_err = NULL; // Initialize APSS l_err = apss_initialize(); if(NULL != l_err) { TRAC_ERR("APSS Init failed! (retrying) ErrLog[%p]", l_err); setErrlSevToInfo(l_err); // commit & delete commitErrl(&l_err); // Retry one more time l_err = apss_initialize(); if(NULL != l_err) { TRAC_ERR("APSS Init failed again! ErrLog[%p]",l_err); } } return l_err; }
// Initialize the memory task data void memory_init() { if(G_mem_monitoring_allowed) { // Check if memory task is running (default task is for NIMBUS) const task_id_t mem_task = TASK_ID_DIMM_SM; if(!rtl_task_is_runnable(mem_task)) { if (MEM_TYPE_NIMBUS == G_sysConfigData.mem_type) { // Init DIMM state manager IPC request memory_nimbus_init(); } else { // TODO CUMULUS NOT SUPPORTED YET IN PHASE1 #if 0 TRAC_INFO("memory_init: calling centaur_init()"); centaur_init(); //no rc, handles errors internally #endif TRAC_ERR("memory_init: invalid memory type 0x%02X", G_sysConfigData.mem_type); /* * @errortype * @moduleid DIMM_MID_MEMORY_INIT * @reasoncode MEMORY_INIT_FAILED * @userdata1 memory type * @userdata2 0 * @devdesc Invalid memory type detected */ errlHndl_t err = createErrl(DIMM_MID_MEMORY_INIT, MEMORY_INIT_FAILED, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, G_sysConfigData.mem_type, 0); REQUEST_RESET(err); } // check if the init resulted in a reset if(isSafeStateRequested()) { TRAC_ERR("memory_init: OCC is being reset, memory init failed (type=0x%02X)", G_sysConfigData.mem_type); } else { // Initialization was successful. Set task flags to allow memory // tasks to run and also prevent from doing initialization again. G_task_table[mem_task].flags = MEMORY_DATA_RTL_FLAGS; //G_task_table[TASK_ID_CENTAUR_CONTROL].flags = MEMORY_CONTROL_RTL_FLAGS; } } } } // end memory_init()
//************************************************************************* // Entry point function //************************************************************************* errlHndl_t traceTest(void * i_arg) { errlHndl_t l_err = NULL; UINT l_rc = 0; do { // function unit test l_rc = traceFuncTest(); if(l_rc) { printf("traceTest Applet: Function test failed\n"); break; } // Macro test: test basic trace macros with/without parameters // int: supported TRAC_INFO(para_int_0); TRAC_INFO(para_int_1, 1); TRAC_INFO(para_int_5, 1, 2, 3, 4, 5); TRAC_INFO(para_int_6, 1, 2, 3, 4, 5, 6); // hex: supported TRAC_ERR(para_hex_0); TRAC_ERR(para_hex_1, 0xA); TRAC_ERR(para_hex_5, 0xA, 0xB, 0xC, 0xD, 0xE); TRAC_ERR(para_hex_6, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF); // char: not supported TRAC_IMP(para_int_0); TRAC_IMP(para_chr_1, "1"); TRAC_IMP(para_chr_5, "1", "2", "3", "4", "5"); TRAC_IMP(para_chr_6, "1", "2", "3", "4", "5", "6"); #ifdef TEST_SEMAPHORE // semaphore test l_rc = traceSemTest(); if(l_rc) { printf("traceTest Applet: Semaphore test failed\n"); break; } #endif }while(0); printf("traceTest Applet: test finished\n"); return l_err; }
// Function Specification // // Name: cmdh_mnfg_test_parse // // Description: This function parses the manufacturing commands sent via TMGT. // // End Function Specification errlHndl_t cmdh_mnfg_test_parse (const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = 0; uint8_t l_sub_cmd = 0; errlHndl_t l_errl = NULL; // Sub-command is always first byte of data l_sub_cmd = i_cmd_ptr->data[0]; TRAC_INFO("cmdh_mnfg_test_parse: Mnfg sub-command [0x%02x]", l_sub_cmd); switch (l_sub_cmd) { case MNFG_RUN_STOP_SLEW: l_rc = cmdh_mnfg_run_stop_slew(i_cmd_ptr, o_rsp_ptr); break; case MNFG_OVERSUB_EMULATION: l_rc = cmdh_mnfg_emulate_oversub(i_cmd_ptr, o_rsp_ptr); break; case MNFG_LIST_SENSORS: l_rc = cmdh_mnfg_list_sensors(i_cmd_ptr, o_rsp_ptr); break; case MNFG_GET_SENSOR: l_rc = cmdh_mnfg_get_sensor(i_cmd_ptr, o_rsp_ptr); break; case MNFG_MEMORY_SLEW: l_rc = cmdh_mnfg_mem_slew(i_cmd_ptr, o_rsp_ptr); break; case MNFG_QUAD_PSTATE: l_rc = cmdh_mnfg_request_quad_pstate(i_cmd_ptr, o_rsp_ptr); break; case MNFG_READ_PSTATE_TABLE: l_rc = cmdh_mnfg_read_pstate_table(i_cmd_ptr, o_rsp_ptr); break; default: // Should never get here... l_rc = ERRL_RC_INVALID_DATA; break; } // All errors in MNFG logged internally if (l_rc) { TRAC_ERR("Mfg command 0x%02x failed with rc = %d", l_sub_cmd, l_rc); // Build Error Response packet cmdh_build_errl_rsp(i_cmd_ptr, o_rsp_ptr, l_rc, &l_errl); } return l_errl; }
// Function Specification // // Name: cmdh_mnfg_emulate_oversub // // Description: This function handles the manufacturing command to emulate // oversubscription. // // End Function Specification uint8_t cmdh_mnfg_emulate_oversub(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = 0; mnfg_emul_oversub_cmd_t *l_cmd_ptr = (mnfg_emul_oversub_cmd_t*) i_cmd_ptr; mnfg_emul_oversub_rsp_t *l_rsp_ptr = (mnfg_emul_oversub_rsp_t*) o_rsp_ptr; do { // This command is only supported on Master OCC if (G_occ_role == OCC_SLAVE) { TRAC_ERR("cmdh_mnfg_emulate_oversub: Mnfg command not supported on Slave OCCs!"); break; } switch (l_cmd_ptr->action) { case 0x00: TRAC_INFO("cmdh_mnfg_emulate_oversub: Disable oversubscription emulation"); AMEC_INTF_GET_OVERSUBSCRIPTION_EMULATION() = 0; l_rsp_ptr->state = l_cmd_ptr->action; break; case 0x01: TRAC_INFO("cmdh_mnfg_emulate_oversub: Enable oversubscription emulation"); AMEC_INTF_GET_OVERSUBSCRIPTION_EMULATION() = 1; l_rsp_ptr->state = l_cmd_ptr->action; break; case 0xFF: TRAC_INFO("cmdh_mnfg_emulate_oversub: Query oversubscription emulation"); l_rsp_ptr->state = AMEC_INTF_GET_OVERSUBSCRIPTION_EMULATION(); break; default: TRAC_INFO("cmdh_mnfg_emulate_oversub: Invalid oversubscription emulation action"); l_rsp_ptr->state = AMEC_INTF_GET_OVERSUBSCRIPTION_EMULATION(); break; } }while(0); // Populate the response data packet G_rsp_status = ERRL_RC_SUCCESS; l_rsp_ptr->data_length[0] = 0; l_rsp_ptr->data_length[1] = 1; return l_rc; }
// Function Specification // // Name: reset_wof_clear_inhibit // // Description: This function clears the inhibit bits that are // set as part of the WOF function // // End Function Specification void reset_wof_clear_inhibit() { uint64_t l_data64 = 0; uint32_t l_rc = 0; // Do not inhibit core wakeup anymore l_data64 = 0x0000000000000000ull; l_rc = _putscom(PDEMR, l_data64, SCOM_TIMEOUT); if (l_rc != 0) { TRAC_ERR("reset_wof_clear_inhibit: Error writing to PDEMR register! addr[0x%08X] rc[0x%08X]", PDEMR, l_rc); } else { TRAC_IMP("reset_wof_clear_inhibit: PDEMR register has been successfully cleared"); } }
/* * Function Specification * * Name: workaround_HW258436 * * Description: Sets up the PBA so that there is no overlap in use of buffers between * GPE engines and other engines. This came from Bishop Brock. * It should be pulled out after the procedure that sets up the PBA * has been fixed. Without this workaround we see an invalid instruction * failure on the GPE. * * End Function Specification */ void workaround_HW258436() { uint64_t l_scom_data = 0; int l_rc = 0; do { //scom errors will be committed internally -- gm033 l_rc = getscom_ffdc(0x64004, &l_scom_data, NULL); if(l_rc) break; l_scom_data &= 0xfffff1ffffffffffull; l_scom_data |= 0x0000080000000000ull; l_rc = putscom_ffdc(0x64004, l_scom_data, NULL); if(l_rc) break; l_rc = getscom_ffdc(0x64005, &l_scom_data, NULL); if(l_rc) break; l_scom_data &= 0xfffff1ffffffffffull; l_scom_data |= 0x0000040000000000ull; l_rc = putscom_ffdc(0x64005, l_scom_data, NULL); if(l_rc) break; l_rc = getscom_ffdc(0x64006, &l_scom_data, NULL); if(l_rc) break; l_scom_data &= 0xfffff1ffffffffffull; l_scom_data |= 0x0000040000000000ull; l_rc = putscom_ffdc(0x64006, l_scom_data, NULL); if(l_rc) break; l_rc = getscom_ffdc(0x64007, &l_scom_data, NULL); if(l_rc) break; l_scom_data &= 0xfffff1ffffffffffull; l_scom_data |= 0x0000040000000000ull; l_rc = putscom_ffdc(0x64007, l_scom_data, NULL); if(l_rc) break; }while(0); if(l_rc) { TRAC_ERR("workaround_HW258436: scom failure. rc=0x%08x", l_rc); } }
/** * @brief Poll for completion of a FSI operation, return data on read */ int32_t poll_for_complete( uint32_t * o_val ) { int32_t rc = SUCCESS; enum { MAX_OPB_TIMEOUT_NS = 10*NS_PER_MSEC }; /*=10ms */ *o_val = 0; uint64_t read_data = 0; uint64_t elapsed_time_ns = 0; do { rc = xscom_read( OPB_REG_STAT, &read_data ); if ( SUCCESS != rc ) { fsi_recovery(); /* Try to recover the engine. */ return rc; } /* Check for completion. Note: not checking for FSI errors. */ if ( (read_data & OPB_STAT_BUSY) == 0 ) break; /* Not busy */ sleep( 10000 ); /* sleep for 10,000 ns */ elapsed_time_ns += 10000; } while ( elapsed_time_ns <= MAX_OPB_TIMEOUT_NS ); if ( MAX_OPB_TIMEOUT_NS < elapsed_time_ns ) { TRAC_ERR( "[poll_for_complete] FSI request timed out." ); return FAIL; } *o_val = (uint32_t)read_data; /* Data in the bottom half. */ return rc; }
// Function Specification // // Name: amec_slave_init // // Description: Perform initialization of any/all AMEC Slave Functions // // End Function Specification void amec_slave_init() { errlHndl_t l_err = NULL; // Error handler int rc = 0; // Return code int rc2 = 0; // Return code // Set the GPE Request Pointers to NULL in case the create fails. G_fw_timing.gpe0_timing_request = NULL; G_fw_timing.gpe1_timing_request = NULL; // Initializes the GPE routine that will be used to measure the worst case // timings for GPE0 rc = pore_flex_create( &G_gpe_nop_request[0], //gpe_req for the task &G_pore_gpe0_queue, //queue (void *) GPE_pore_nop, //entry point (uint32_t) NULL, //parm for the task SSX_WAIT_FOREVER, //no timeout (AsyncRequestCallback) amec_slv_update_gpe_sensors, //callback (void *) GPE_ENGINE_0, //callback argument ASYNC_CALLBACK_IMMEDIATE ); //options // Initializes the GPE routine that will be used to measure the worst case // timings for GPE1 rc2 = pore_flex_create( &G_gpe_nop_request[1], //gpe_req for the task &G_pore_gpe1_queue, //queue (void *)GPE_pore_nop, //entry point (uint32_t) NULL, //parm for the task SSX_WAIT_FOREVER, //no timeout (AsyncRequestCallback) amec_slv_update_gpe_sensors, //callback (void *) GPE_ENGINE_1, //callback argument ASYNC_CALLBACK_IMMEDIATE ); //options // If we couldn't create the poreFlex objects, there must be a major problem // so we will log an error and halt OCC. if( rc || rc2 ) { //If fail to create pore flex object then there is a problem. TRAC_ERR("Failed to create GPE duration poreFlex object[0x%x, 0x%x]", rc, rc2 ); /* @ * @errortype * @moduleid AMEC_INITIALIZE_FW_SENSORS * @reasoncode SSX_GENERIC_FAILURE * @userdata1 return code - gpe0 * @userdata2 return code - gpe1 * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Failure to create PORE-GPE poreFlex object for FW timing * analysis. * */ l_err = createErrl( AMEC_INITIALIZE_FW_SENSORS, //modId SSX_GENERIC_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //TODO: create trace //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size rc, //userdata1 rc2 //userdata2 ); REQUEST_RESET(l_err); } else { // Everything was successful, so set FW timing pointers to these // GPE Request objects G_fw_timing.gpe0_timing_request = &G_gpe_nop_request[0]; G_fw_timing.gpe1_timing_request = &G_gpe_nop_request[1]; } // Initialize Vector Sensors for AMEC use amec_init_vector_sensors(); // Initialize AMEC internal parameters amec_init_gamec_struct(); }
// Function Specification // // Name: cmdh_mnfg_read_pstate_table // // Description: This function handles the manufacturing command to read // the generated Pstate table from main memory 3K blocks at a time // // End Function Specification uint8_t cmdh_mnfg_read_pstate_table(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; uint16_t l_datalength = 0; uint16_t l_resp_data_length = 0; uint32_t block_offset = 0; uint32_t main_mem_address = 0; int l_ssxrc = SSX_OK; mnfg_read_pstate_table_cmd_t *l_cmd_ptr = (mnfg_read_pstate_table_cmd_t*) i_cmd_ptr; do { // Check command packet data length l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr); if(l_datalength != (sizeof(mnfg_read_pstate_table_cmd_t) - sizeof(cmdh_fsp_cmd_header_t))) { TRAC_ERR("cmdh_mnfg_read_pstate_table: incorrect data length. exp[%d] act[%d]", (sizeof(mnfg_read_pstate_table_cmd_t) - sizeof(cmdh_fsp_cmd_header_t)), l_datalength); l_rc = ERRL_RC_INVALID_CMD_LEN; break; } // Process request if(l_cmd_ptr->request == MFG_PSTATE_READ_REQUEST_QUERY) { memcpy(&o_rsp_ptr->data[0], &G_pgpe_header.generated_pstate_table_homer_offset, 4); memcpy(&o_rsp_ptr->data[4], &G_pgpe_header.generated_pstate_table_length, 4); l_resp_data_length = MFG_PSTATE_READ_QUERY_RSP_SIZE; TRAC_INFO("cmdh_mnfg_read_pstate_table: Query table memory offset[0x%08x] table length[%d]", G_pgpe_header.generated_pstate_table_homer_offset, G_pgpe_header.generated_pstate_table_length); break; } // Calculate the starting main memory address for block to read block_offset = MFG_PSTATE_READ_MAX_RSP_SIZE * l_cmd_ptr->request; if(block_offset > G_pgpe_header.generated_pstate_table_length) { TRAC_ERR("cmdh_mnfg_read_pstate_table: Block request %d out of range. Pstate Table size %d", l_cmd_ptr->request, G_pgpe_header.generated_pstate_table_length); l_rc = ERRL_RC_INVALID_DATA; break; } main_mem_address = G_pgpe_header.generated_pstate_table_homer_offset + block_offset; // Copy Pstate table from main memory to SRAM // Set up a copy request l_ssxrc = bce_request_create(&G_mfg_pba_request, // block copy object &G_pba_bcde_queue, // mainstore to sram copy engine main_mem_address, // mainstore address (uint32_t)&G_mfg_read_pstate_table, // sram starting address sizeof(mfg_read_pstate_table_t), // size of copy SSX_SECONDS(1), // timeout NULL, // no call back NULL, // no call back arguments ASYNC_REQUEST_BLOCKING); // blocking request if(l_ssxrc != SSX_OK) { TRAC_ERR("cmdh_mnfg_read_pstate_table: BCDE request create failure rc=[%08X]", -l_ssxrc); l_rc = ERRL_RC_INTERNAL_FAIL; break; } // Do actual copying l_ssxrc = bce_request_schedule(&G_mfg_pba_request); if(l_ssxrc != SSX_OK) { TRAC_ERR("cmdh_mnfg_read_pstate_table: BCE request schedule failure rc=[%08X]", -l_ssxrc); l_rc = ERRL_RC_INTERNAL_FAIL; break; } // Determine the rsp data length l_resp_data_length = MFG_PSTATE_READ_MAX_RSP_SIZE; if((block_offset + MFG_PSTATE_READ_MAX_RSP_SIZE) > G_pgpe_header.generated_pstate_table_length) { l_resp_data_length = G_pgpe_header.generated_pstate_table_length - block_offset; } // Copy to response buffer memcpy(o_rsp_ptr->data, &G_mfg_read_pstate_table, l_resp_data_length); TRAC_INFO("cmdh_mnfg_read_pstate_table: Read from main memory[0x%08x] block offset[%d] length[%d]", main_mem_address, block_offset, l_resp_data_length); }while(0); // Populate the response data header G_rsp_status = l_rc; o_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0]; o_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1]; return l_rc; }
// Function Specification // // Name: cmdh_mnfg_request_quad_pstate // // Description: This function handles the manufacturing command to request // a Pstate per Quad. // // End Function Specification uint8_t cmdh_mnfg_request_quad_pstate(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; uint16_t l_datalength = 0; uint16_t l_resp_data_length = 0; uint8_t l_pmin = 0xFF; uint8_t l_pmax = 0xFF; uint8_t l_pstate_request = 0xFF; uint8_t l_quad = 0; mnfg_quad_pstate_cmd_t *l_cmd_ptr = (mnfg_quad_pstate_cmd_t*) i_cmd_ptr; mnfg_quad_pstate_rsp_t *l_rsp_ptr = (mnfg_quad_pstate_rsp_t*) o_rsp_ptr; do { if(!IS_OCC_STATE_ACTIVE()) { TRAC_ERR("cmdh_mnfg_request_quad_pstate: OCC must be active to request pstate"); l_rc = ERRL_RC_INVALID_STATE; break; } if(G_sysConfigData.system_type.kvm) { TRAC_ERR("cmdh_mnfg_request_quad_pstate: Must be PowerVM to request pstate"); l_rc = ERRL_RC_INVALID_CMD; break; } // Check command packet data length l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr); if(l_datalength != (sizeof(mnfg_quad_pstate_cmd_t) - sizeof(cmdh_fsp_cmd_header_t))) { TRAC_ERR("cmdh_mnfg_request_quad_pstate: incorrect data length. exp[%d] act[%d]", (sizeof(mnfg_quad_pstate_cmd_t) - sizeof(cmdh_fsp_cmd_header_t)), l_datalength); l_rc = ERRL_RC_INVALID_CMD_LEN; break; } // Check version if(l_cmd_ptr->version != MFG_QUAD_PSTATE_VERSION) { TRAC_ERR("cmdh_mnfg_request_quad_pstate: incorrect version. exp[%d] act[%d]", MFG_QUAD_PSTATE_VERSION, l_cmd_ptr->version); l_rc = ERRL_RC_INVALID_DATA; break; } // only allow a Pstate within the current range based on mode l_pmin = proc_freq2pstate(g_amec->sys.fmin); l_pmax = proc_freq2pstate(g_amec->sys.fmax); // Process each quad Pstate request, clip any request to min/max // 0xFF has special meaning that OCC is in control for(l_quad = 0; l_quad < MAXIMUM_QUADS; l_quad++) { l_pstate_request = l_cmd_ptr->quad_pstate_in[l_quad]; if(l_pstate_request != 0xFF) { // pmin is lowest frequency corresponding to highest pState value if(l_pstate_request > l_pmin) l_pstate_request = l_pmin; // pmax is highest frequency corresponding to lowest pState value else if(l_pstate_request < l_pmax) l_pstate_request = l_pmax; } // save the quad pState request for amec and return in rsp data g_amec->mnfg_parms.quad_pstate[l_quad] = l_pstate_request; l_rsp_ptr->quad_pstate_out[l_quad] = l_pstate_request; TRAC_INFO("cmdh_mnfg_request_quad_pstate: Quad %d Pstate in = 0x%02x Pstate out = 0x%02x", l_quad, l_cmd_ptr->quad_pstate_in[l_quad], l_rsp_ptr->quad_pstate_out[l_quad]); } }while(0); // Populate the response data header G_rsp_status = l_rc; l_resp_data_length = sizeof(mnfg_quad_pstate_rsp_t) - sizeof(cmdh_fsp_rsp_header_t); l_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0]; l_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1]; return l_rc; }
// Function Specification // // Name: dbug_err_inject // // Description: Injects an error // // End Function Specification void dbug_err_inject(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * i_rsp_ptr) { errlHndl_t l_err; cmdh_dbug_inject_errl_query_t *l_cmd_ptr = (cmdh_dbug_inject_errl_query_t*) i_cmd_ptr; i_rsp_ptr->data_length[0] = 0; i_rsp_ptr->data_length[1] = 0; G_rsp_status = ERRL_RC_SUCCESS; if(!strncmp(l_cmd_ptr->comp, "RST", OCC_TRACE_NAME_SIZE)) { l_err = createErrl(CMDH_DBUG_MID, //modId INTERNAL_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size 0xff, //userdata1 0); //userdata2 if (INVALID_ERR_HNDL == l_err) { G_rsp_status = ERRL_RC_INTERNAL_FAIL; } addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, //callout type (HUID/CompID) G_sysConfigData.proc_huid, //callout data ERRL_CALLOUT_PRIORITY_HIGH); //priority REQUEST_RESET(l_err); } else { l_err = createErrl(CMDH_DBUG_MID, //modId INTERNAL_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_UNRECOVERABLE, //Severity TRAC_get_td(l_cmd_ptr->comp), //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size 0xff, //userdata1 0); //userdata2 if (INVALID_ERR_HNDL == l_err) { G_rsp_status = ERRL_RC_INTERNAL_FAIL; } // Commit Error log commitErrl(&l_err); } if (G_rsp_status == ERRL_RC_INTERNAL_FAIL) { TRAC_ERR("cmdh_dbug_inject_errl: Fail creating ERR Log\n"); } else { TRAC_INFO("cmdh_dbug_inject_errl: inject errl for COMP : %s\n", l_cmd_ptr->comp); } return; }
void task_core_data( task_t * i_task ) { errlHndl_t l_err = NULL; //Error handler tracDesc_t l_trace = NULL; //Temporary trace descriptor int rc = 0; //return code bulk_core_data_task_t * l_bulk_core_data_ptr = (bulk_core_data_task_t *)i_task->data_ptr; GpeGetCoreDataParms * l_parms = (GpeGetCoreDataParms *)(l_bulk_core_data_ptr->gpe_req.parameter); gpe_bulk_core_data_t * l_temp = NULL; do { //First, check to see if the previous GPE request still running //A request is considered idle if it is not attached to any of the //asynchronous request queues if( !(async_request_is_idle(&l_bulk_core_data_ptr->gpe_req.request)) ) { //This should not happen unless there's a problem //Trace 1 time if( !G_queue_not_idle_traced ) { TRAC_ERR("Core data GPE is still running \n"); G_queue_not_idle_traced = TRUE; } break; } //Need to complete collecting data for all assigned cores from previous interval //and tick 0 is the current tick before collect data again. if( (l_bulk_core_data_ptr->current_core == l_bulk_core_data_ptr->end_core) && ((CURRENT_TICK & (MAX_NUM_TICKS - 1)) != 0) ) { PROC_DBG("Not collect data. Need to wait for tick.\n"); break; } //Check to see if the previously GPE request has successfully completed //A request is not considered complete until both the engine job //has finished without error and any callback has run to completion. if( async_request_completed(&l_bulk_core_data_ptr->gpe_req.request) && CORE_PRESENT(l_bulk_core_data_ptr->current_core) ) { //If the previous GPE request succeeded then swap core_data_ptr //with the global one. The gpe routine will write new data into //a buffer that is not being accessed by the RTLoop code. PROC_DBG( "Swap core_data_ptr [%x] with the global one\n", l_bulk_core_data_ptr->current_core ); //debug only #ifdef PROC_DEBUG print_core_status(l_bulk_core_data_ptr->current_core); print_core_data_sensors(l_bulk_core_data_ptr->current_core); #endif l_temp = l_bulk_core_data_ptr->core_data_ptr; l_bulk_core_data_ptr->core_data_ptr = G_core_data_ptrs[l_bulk_core_data_ptr->current_core]; G_core_data_ptrs[l_bulk_core_data_ptr->current_core] = l_temp; //Core data has been collected so set the bit in global mask. //AMEC code will know which cores to update sensors for. AMEC is //responsible for clearing the bit later on. G_updated_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core); // Presumptively clear the empath error mask G_empath_error_core_mask &= ~(CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core)); // The gpe_data collection code has to handle the workaround for // HW280375. Two new flags have been added to the OHA_RO_STATUS_REG // image to indicate whether the EMPATH collection failed, and // whether it was due to an "expected" error that we can ignore // (we can ignore the data as well), or an "unexpected" error that // we will create an informational log one time. // // The "expected" errors are very rare in practice, in fact we may // never even see them unless running a specific type of workload. // If you want to test the handling of expected errors compile the // GPE code with -DINJECT_HW280375_ERRORS which will inject an error // approximately every 1024 samples // // To determine if the expected error has occurred inspect the // CoreDataOha element of the CoreData structure written by the GPE // core data job. The OHA element contains the oha_ro_status_reg. // Inside the OHA status register is a 16 bit reserved field. // gpe_data.h defines two masks that can be applied against the // reserved field to check for these errors: // CORE_DATA_EXPECTED_EMPATH_ERROR // CORE_DATA_UNEXPECTED_EMPATH_ERROR // Also, a 4-bit PCB parity + error code is saved at bit position: // CORE_DATA_EMPATH_ERROR_LOCATION, formally the length is // specified by: CORE_DATA_EMPATH_ERROR_BITS gpe_bulk_core_data_t *l_core_data = G_core_data_ptrs[l_bulk_core_data_ptr->current_core]; // We will trace the errors, but only a certain number of // times, we will only log the unexpected error once. #define OCC_EMPATH_ERROR_THRESH 10 static uint32_t L_expected_emp_err_cnt = 0; static uint32_t L_unexpected_emp_err_cnt = 0; // Check the reserved field for the expected or the unexpected error flag if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR) || (l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR)) { // Indicate empath error on current core G_empath_error_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core); // Save the high and low order words of the OHA status reg uint32_t l_oha_reg_high = l_core_data->oha.oha_ro_status_reg.words.high_order; uint32_t l_oha_reg_low = l_core_data->oha.oha_ro_status_reg.words.low_order; // Handle each error case if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR) && (L_expected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH)) { L_expected_emp_err_cnt++; TRAC_IMP("Expected empath collection error occurred %d time(s)! Core = %d", L_expected_emp_err_cnt, l_bulk_core_data_ptr->current_core); TRAC_IMP("OHA status register: 0x%4.4x%4.4x", l_oha_reg_high, l_oha_reg_low); } if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR) && (L_unexpected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH)) { L_unexpected_emp_err_cnt++; TRAC_ERR("Unexpected empath collection error occurred %d time(s)! Core = %d", L_unexpected_emp_err_cnt, l_bulk_core_data_ptr->current_core); TRAC_ERR("OHA status register: 0x%4.4x%4.4x", l_oha_reg_high, l_oha_reg_low); // Create and commit an informational error the first // time this occurs. if (L_unexpected_emp_err_cnt == 1) { TRAC_IMP("Logging unexpected empath collection error 1 time only."); /* * @errortype * @moduleid PROC_TASK_CORE_DATA_MOD * @reasoncode INTERNAL_HW_FAILURE * @userdata1 OHA status reg high * @userdata2 OHA status reg low * @userdata4 ERC_PROC_CORE_DATA_EMPATH_ERROR * @devdesc An unexpected error occurred while * collecting core empath data. */ l_err = createErrl( PROC_TASK_CORE_DATA_MOD, //modId INTERNAL_HW_FAILURE, //reason code ERC_PROC_CORE_DATA_EMPATH_ERROR, //Extended reason code ERRL_SEV_INFORMATIONAL, //Severity NULL, //Trace DEFAULT_TRACE_SIZE, //Trace Size l_oha_reg_high, //userdata1 l_oha_reg_low); //userdata2 commitErrl(&l_err); } } } }
errorHndl_t doMessage( astMbox_t *io_mbox, mboxMessage_t *io_msg, int i_arg_size ) { uint8_t* l_data = (uint8_t*)io_msg; errorHndl_t l_err = NO_ERROR; uint8_t l_stat1; uint32_t l_loops = 0; bool l_prot_error = false; int i; io_msg->iv_seq = io_mbox->iv_mboxMsgSeq++; //First try to send the message over IPMI l_err = ipmi_sendCommand(io_msg, i_arg_size); // If it didn't work then try to access the AST MBOX via LPC // This is allowd for the case of an older BMC. Eventually it could // be removed. if(l_err) { do { /* Write message out */ for (i = 0; i < BMC_MBOX_DATA_REGS && !l_err; i++) { l_err = mboxOut(i, l_data[i]); } if ( l_err ) { break; } /* Clear status1 response bit as it was just set via reg write*/ l_err = mboxOut(MBOX_STATUS_1, MBOX_STATUS1_RESP); if ( l_err ) { break; } /* Ping BMC */ l_err = mboxOut(MBOX_HOST_CTRL, MBOX_CTRL_INT_SEND); if ( l_err ) { break; } /* Wait for response */ while ( l_loops++ < MBOX_MAX_RESP_WAIT_US && !l_err ) { l_err = mboxIn(MBOX_STATUS_1, &l_stat1); if ( l_err ) { TRAC_ERR("doMessage error from MBOX_STATUS_1"); break; } if ( l_stat1 & MBOX_STATUS1_RESP ) { break; } busy_wait(1000); } if ( l_err ) { TRAC_ERR( "Got error waiting for response !"); break; } if ( !(l_stat1 & MBOX_STATUS1_RESP) ) { TRAC_ERR( "Timeout waiting for response !"); // Don't try to interrupt the BMC anymore l_err = mboxOut(MBOX_HOST_CTRL, 0); if ( l_err) { //Note the command failed TRAC_ERR( "Error communicating with MBOX daemon"); TRAC_ERR( "Mbox status 1 reg: %x", l_stat1); } // Tell the code below that we generated the error // (not an LPC error) l_prot_error = true; break; } /* Clear status */ l_err = mboxOut(MBOX_STATUS_1, MBOX_STATUS1_RESP); if (l_err) { TRAC_ERR( "Got error clearing status"); break; } // Remember some message fields before they get overwritten // by the response uint8_t old_seq = io_msg->iv_seq; // Read response for (i = 0; i < BMC_MBOX_DATA_REGS && !l_err; i++) { l_err = mboxIn(i, &l_data[i]); } if ( l_err ) { TRAC_ERR( "Got error reading response !"); break; } if (old_seq != io_msg->iv_seq) { TRAC_ERR( "bad sequence number in mbox message, got %d want %d", io_msg->iv_seq, old_seq); l_err = -1; break; } if (io_msg->iv_resp != MBOX_R_SUCCESS) { TRAC_ERR( "BMC mbox command failed with err %d", io_msg->iv_resp); l_err = -1; // Tell code below that we generated the error (not an LPC error) l_prot_error = true; break; } } while(0); // If we got an LPC error, commit it and generate our own if ( l_err && !l_prot_error ) { l_err = -1; } } return l_err; }
// Function Specification // // Name: SMGR_set_mode // // Description: // // End Function Specification errlHndl_t SMGR_set_mode(const OCC_MODE i_mode, const uint8_t i_sms_type) { errlHndl_t l_errlHndl = NULL; int jj=0; OCC_MODE l_mode = i_mode; do { // Get lock for critical section if(ssx_semaphore_pend(&G_smgrModeChangeSem,SSX_WAIT_FOREVER)) { /* @ * @errortype * @moduleid MAIN_MODE_TRANSITION_MID * @reasoncode SSX_GENERIC_FAILURE * @userdata1 none * @userdata4 ERC_RUNNING_SEM_PENDING_FAILURE * @devdesc SSX semaphore related failure */ l_errlHndl = createErrl(MAIN_MODE_TRANSITION_MID, //modId SSX_GENERIC_FAILURE, //reasoncode ERC_RUNNING_SEM_PENDING_FAILURE,//Extended reason code ERRL_SEV_UNRECOVERABLE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size 0, //userdata1 0); //userdata2 // Callout firmware addCalloutToErrl(l_errlHndl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH); break; } //Check to see if we need to make a change if(l_mode == OCC_MODE_NOCHANGE) { break; } // SAPPHIRE only accepts DPS-FE mode. In case OCC gets other modes, it should accept the request // and keep reporting back that it is in that mode. However, internally we should not // initiate any mode transition, i.e., OCC should remain internally in DPS-FE mode. if(G_sysConfigData.system_type.kvm) { G_occ_external_req_mode_kvm = l_mode; if (l_mode != OCC_MODE_DYN_POWER_SAVE) { TRAC_ERR("SAPPHIRE only accepts DPS-FE mode(6) but requested mode is : %d", l_mode); l_mode = OCC_MODE_DYN_POWER_SAVE; } } switch (l_mode) { case OCC_MODE_NOMINAL: // FALL THROUGH case OCC_MODE_PWRSAVE: // FALL THROUGH case OCC_MODE_DYN_POWER_SAVE: // FALL THROUGH case OCC_MODE_DYN_POWER_SAVE_FP: // FALL THROUGH case OCC_MODE_TURBO: // FALL THROUGH case OCC_MODE_STURBO: // FALL THROUGH case OCC_MODE_FFO: // FALL THROUGH // Notify AMEC of mode change // Change Mode via Transition Function do { // Loop through mode transition table, and find the state // transition function that matches the transition we need to do. for(jj=0; jj<G_smgr_mode_trans_count; jj++) { if( ((G_smgr_mode_trans[jj].old_state == G_occ_internal_mode) || (G_smgr_mode_trans[jj].old_state == OCC_MODE_ALL) ) && (G_smgr_mode_trans[jj].new_state == l_mode) ) { // We found the transtion that matches, now run the function // that is associated with that state transition. if(NULL != G_smgr_mode_trans[jj].trans_func_ptr) { // Signal that we are now in a mode transition G_mode_transition_occuring = TRUE; // Run transition function l_errlHndl = (G_smgr_mode_trans[jj].trans_func_ptr)(); // Signal that we are done with the transition G_mode_transition_occuring = FALSE; break; } } } // Check if we hit the end of the table without finding a valid // mode transition. If we did, log an internal error. if(G_smgr_mode_trans_count == jj) { TRAC_ERR("No transition (or NULL) found for the mode change\n"); l_errlHndl = NULL; //TODO: Create Error break; } // Update the power mode for all core groups that are following system mode AMEC_part_update_sysmode_policy(CURRENT_MODE()); } while(0); break; default: //unsupported mode break; } if(l_errlHndl) { // Punt !!! :-) break; } // Load correct thermal thresholds based on the current mode l_errlHndl = AMEC_data_write_thrm_thresholds(CURRENT_MODE()); // Update the CPU speed in AME? // Register the New Mode? // Update Power Policy Requirements? // Update CPM Calibration }while(0); // If we have a mode change failure, Mode change flag needs to be set, // otherwise, it needs be be cleared/unset. if(l_errlHndl) { } // Unlock critical section ssx_semaphore_post(&G_smgrModeChangeSem); return l_errlHndl; }
// Function Specification // // Name: cmdh_mnfg_mem_slew // // Description: This function handles the manufacturing command to start // or stop memory autoslewing. // // End Function Specification uint8_t cmdh_mnfg_mem_slew(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; mnfg_mem_slew_cmd_t *l_cmd_ptr = (mnfg_mem_slew_cmd_t*) i_cmd_ptr; mnfg_mem_slew_rsp_t *l_rsp_ptr = (mnfg_mem_slew_rsp_t*) o_rsp_ptr; do { // Do some basic input verification if (l_cmd_ptr->action > MNFG_INTF_SLEW_STOP) { // Invalid values were passed by the user! TRAC_ERR("cmdh_mnfg_mem_slew: Invalid value was detected! action[0x%02x]", l_cmd_ptr->action); l_rc = ERRL_RC_INVALID_DATA; break; } // Are we stopping the auto-slew function? if (l_cmd_ptr->action == MNFG_INTF_SLEW_STOP) { // Send a signal to RTL to stop auto-slewing g_amec->mnfg_parms.mem_autoslew = FALSE; // Collect the slew count if(g_amec->mnfg_parms.mem_slew_counter > 0x0000FFFF) { l_rsp_ptr->slew_count = 0xFFFF; } else { l_rsp_ptr->slew_count = g_amec->mnfg_parms.mem_slew_counter; } // Zero out the slew count; g_amec->mnfg_parms.mem_slew_counter = 0; TRAC_INFO("cmdh_mnfg_mem_slew: Auto-slewing has been stopped. Count[%u]", l_rsp_ptr->slew_count); // We are done break; } // If we made it here, that means we are starting up a slew run TRAC_INFO("cmdh_mnfg_mem_slew: We are about to start auto-slewing function"); // If the OCC is active (we can only run auto-slew in active state) the memory control // task must be running and there is no support (or need) to force activation of // memory monitoring and control if(!IS_OCC_STATE_ACTIVE()) { TRAC_ERR("cmdh_mnfg_mem_slew: OCC must be active to start mem slewing"); l_rc = ERRL_RC_INVALID_STATE; break; } if(!rtl_task_is_runnable(TASK_ID_MEMORY_CONTROL)) { TRAC_ERR("cmdh_mnfg_mem_slew: memory control task not running"); l_rc = ERRL_RC_INTERNAL_FAIL; break; } // Zero out the slew count g_amec->mnfg_parms.mem_slew_counter = 0; // Send a signal to RTL to start memory auto-slewing g_amec->mnfg_parms.mem_autoslew = TRUE; // We are auto-slewing now, populate the response packet l_rsp_ptr->slew_count = 0; TRAC_INFO("cmdh_mnfg_mem_slew: memory slewing started."); }while(0); // Populate the response data packet G_rsp_status = l_rc; l_rsp_ptr->data_length[0] = 0; l_rsp_ptr->data_length[1] = MNFG_INTF_MEM_SLEW_RSP_SIZE; return l_rc; }
// Function Specification // // Name: apss_initialize // // Description: Completes all APSS initialization including GPIOs, altitude and // mode // // End Function Specification errlHndl_t apss_initialize() { errlHndl_t l_err = NULL; PoreFlex request; // Setup the GPIO init structure to pass to the GPE program G_gpe_apss_initialize_gpio_args.error.error = 0; G_gpe_apss_initialize_gpio_args.error.ffdc = 0; G_gpe_apss_initialize_gpio_args.config0.direction = G_gpio_config[0].direction; G_gpe_apss_initialize_gpio_args.config0.drive = G_gpio_config[0].drive; G_gpe_apss_initialize_gpio_args.config0.interrupt = G_gpio_config[0].interrupt; G_gpe_apss_initialize_gpio_args.config1.direction = G_gpio_config[1].direction; G_gpe_apss_initialize_gpio_args.config1.drive = G_gpio_config[1].drive; G_gpe_apss_initialize_gpio_args.config1.interrupt = G_gpio_config[1].interrupt; // Create/schedule GPE_apss_initialize_gpio and wait for it to complete (BLOCKING) TRAC_INFO("Creating request for GPE_apss_initialize_gpio"); pore_flex_create(&request, // request &G_pore_gpe0_queue, // queue (void*)GPE_apss_initialize_gpio, // GPE entry_point (uint32_t)&G_gpe_apss_initialize_gpio_args,// GPE argument_ptr SSX_SECONDS(5), // timeout NULL, // callback NULL, // callback arg ASYNC_REQUEST_BLOCKING); // options // Schedule the request to be executed pore_flex_schedule(&request); // Check for a timeout, will create the error log later // NOTE: As of 2013/07/16, simics will still fail here on a OCC reset if(ASYNC_REQUEST_STATE_TIMED_OUT == request.request.completion_state) { // For whatever reason, we hit a timeout. It could be either // that the HW did not work, or the request didn't ever make // it to the front of the queue. // Let's log an error, and include the FFDC data if it was // generated. TRAC_ERR("Timeout communicating with PORE-GPE for APSS Init"); } TRAC_INFO("GPE_apss_initialize_gpio completed w/rc=0x%08x\n", request.request.completion_state); // Only continue if completed without errors... if (ASYNC_REQUEST_STATE_COMPLETE == request.request.completion_state) { // Setup the composite mode structure to pass to the GPE program G_gpe_apss_set_composite_mode_args.error.error = 0; G_gpe_apss_set_composite_mode_args.error.ffdc = 0; G_gpe_apss_set_composite_mode_args.config.numAdcChannelsToRead = G_apss_composite_config.numAdcChannelsToRead; G_gpe_apss_set_composite_mode_args.config.numGpioPortsToRead = G_apss_composite_config.numGpioPortsToRead; // Create/schedule GPE_apss_set_composite_mode and wait for it to complete (BLOCKING) TRAC_INFO("Creating request for GPE_apss_set_composite_mode"); pore_flex_create(&request, // request &G_pore_gpe0_queue, // queue (void*)GPE_apss_set_composite_mode, // GPE entry_point (uint32_t)&G_gpe_apss_set_composite_mode_args,// GPE argument_ptr SSX_SECONDS(5), // timeout NULL, // callback NULL, // callback arg ASYNC_REQUEST_BLOCKING); // options pore_flex_schedule(&request); // Check for a timeout, will create the error log later if(ASYNC_REQUEST_STATE_TIMED_OUT == request.request.completion_state) { // For whatever reason, we hit a timeout. It could be either // that the HW did not work, or the request didn't ever make // it to the front of the queue. // Let's log an error, and include the FFDC data if it was // generated. TRAC_ERR("Timeout communicating with PORE-GPE for APSS Init"); } TRAC_INFO("GPE_apss_set_composite_mode completed w/rc=0x%08x", request.request.completion_state); if (ASYNC_REQUEST_STATE_COMPLETE != request.request.completion_state) { /* * @errortype * @moduleid PSS_MID_APSS_INIT * @reasoncode INTERNAL_FAILURE * @userdata1 GPE returned rc code * @userdata2 GPE returned abort code * @userdata4 ERC_PSS_COMPOSITE_MODE_FAIL * @devdesc Failure from GPE for setting composite mode on * APSS */ l_err = createErrl(PSS_MID_APSS_INIT, // i_modId, INTERNAL_FAILURE, // i_reasonCode, ERC_PSS_COMPOSITE_MODE_FAIL, // extended reason code ERRL_SEV_UNRECOVERABLE, // i_severity NULL, // i_trace, 0x0000, // i_traceSz, request.request.completion_state, // i_userData1, request.request.abort_state); // i_userData2 addUsrDtlsToErrl(l_err, (uint8_t*)&G_gpe_apss_set_composite_mode_args, sizeof(G_gpe_apss_set_composite_mode_args), ERRL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA); // Returning an error log will cause us to go to safe // state so we can report error to FSP } TRAC_INFO("apss_initialize: Creating request G_meas_start_request."); //Create the request for measure start. Scheduling will happen in apss.c pore_flex_create(&G_meas_start_request, &G_pore_gpe0_queue, // queue (void*)GPE_apss_start_pwr_meas_read, // entry_point (uint32_t)&G_gpe_start_pwr_meas_read_args, // entry_point arg SSX_WAIT_FOREVER, // no timeout NULL, // callback NULL, // callback arg ASYNC_CALLBACK_IMMEDIATE); // options TRAC_INFO("apss_initialize: Creating request G_meas_cont_request."); //Create the request for measure continue. Scheduling will happen in apss.c pore_flex_create(&G_meas_cont_request, &G_pore_gpe0_queue, // request (void*)GPE_apss_continue_pwr_meas_read, // entry_point (uint32_t)&G_gpe_continue_pwr_meas_read_args, // entry_point arg SSX_WAIT_FOREVER, // no timeout NULL, // callback NULL, // callback arg ASYNC_CALLBACK_IMMEDIATE); // options TRAC_INFO("apss_initialize: Creating request G_meas_complete_request."); //Create the request for measure complete. Scheduling will happen in apss.c pore_flex_create(&G_meas_complete_request, &G_pore_gpe0_queue, // queue (void*)GPE_apss_complete_pwr_meas_read, // entry_point (uint32_t)&G_gpe_complete_pwr_meas_read_args,// entry_point arg SSX_WAIT_FOREVER, // no timeout (AsyncRequestCallback)reformat_meas_data, // callback, (void*)NULL, // callback arg ASYNC_CALLBACK_IMMEDIATE); // options } else { /* * @errortype * @moduleid PSS_MID_APSS_INIT * @reasoncode INTERNAL_FAILURE * @userdata1 GPE returned rc code * @userdata2 GPE returned abort code * @userdata4 ERC_PSS_GPIO_INIT_FAIL * @devdesc Failure from GPE for gpio initialization on APSS */ l_err = createErrl(PSS_MID_APSS_INIT, // i_modId, INTERNAL_FAILURE, // i_reasonCode, ERC_PSS_GPIO_INIT_FAIL, // extended reason code ERRL_SEV_UNRECOVERABLE, // i_severity NULL, // tracDesc_t i_trace, 0x0000, // i_traceSz, request.request.completion_state, // i_userData1, request.request.abort_state); // i_userData2 addUsrDtlsToErrl(l_err, (uint8_t*)&G_gpe_apss_initialize_gpio_args, sizeof(G_gpe_apss_initialize_gpio_args), ERRL_STRUCT_VERSION_1, ERRL_USR_DTL_TRACE_DATA); // Returning an error log will cause us to go to safe // state so we can report error to FSP } return l_err; }
//************************************************************************* // Functions //************************************************************************* void amec_vectorize_core_sensor(sensor_t * l_sensor, vectorSensor_t * l_vector, const VECTOR_SENSOR_OP l_op, uint16_t l_sensor_elem_array_gsid) { #define VECTOR_CREATE_FAILURE 1 #define VECTOR_ADD_ELEM_FAILURE 2 int l_idx = 0; // Used to index the for loops for vector create int l_rc = 0; // Indicates failure to add a sensor to vector uint16_t l_gsid = 0xFFFF; errlHndl_t l_err = NULL; do { // Grab GSID for errl in case of failure l_gsid = l_sensor->gsid; // Vectorize the sensor sensor_vectorize(l_sensor, l_vector, l_op); // If vectorize worked, add elements to the vector sensor if(NULL != l_sensor->vector) { // Loop through cores for(l_idx = 0; l_idx < MAX_NUM_CORES; l_idx++) { // Add elements to the vector sensor sensor_vector_elem_add(l_sensor->vector, l_idx, AMECSENSOR_ARRAY_PTR(l_sensor_elem_array_gsid, l_idx)); // If core is not present, disable this vector element if(!CORE_PRESENT(l_idx)) { sensor_vector_elem_enable(l_sensor->vector, l_idx, 0 /* Disable */); } } // Sanity check, we should have MAX_NUM_CORES entries in // vector sensor if(l_sensor->vector->size != MAX_NUM_CORES) { // Set l_rc and break out so that we can create an errl l_rc = VECTOR_ADD_ELEM_FAILURE; break; } } else { // Set l_rc and break out so that we can create an errl l_rc = VECTOR_CREATE_FAILURE; break; } }while(0); if(l_rc) { //If fail to create pore flex object then there is a problem. TRAC_ERR("Failed to vectorize sensor[0x%x, 0x%x]", l_gsid, l_rc ); /* @ * @errortype * @moduleid AMEC_VECTORIZE_FW_SENSORS * @reasoncode SSX_GENERIC_FAILURE * @userdata1 return code * @userdata2 gsid of failed sensor * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Firmware failure in call to vectorize sensor */ l_err = createErrl( AMEC_VECTORIZE_FW_SENSORS, //modId SSX_GENERIC_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_UNRECOVERABLE, //Severity NULL,//TODO: create trace //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size l_rc, //userdata1 l_gsid //userdata2 ); REQUEST_RESET(l_err); } }
// Verifies that each core is at the correct frequency after they have had // time to stabilize void amec_verify_pstate() { uint8_t l_core = 0; int8_t l_pstate_from_fmax = 0; gpe_bulk_core_data_t * l_core_data_ptr; pmc_pmsr_ffcdc_data_t l_pmc_pmsr_ffdc; errlHndl_t l_err = NULL; if ( (G_time_until_freq_check == 0) && ( CURRENT_MODE() != OCC_MODE_DYN_POWER_SAVE ) && ( CURRENT_MODE() != OCC_MODE_DYN_POWER_SAVE_FP ) && (!G_sysConfigData.system_type.kvm)) { // Reset the counter G_time_until_freq_check = FREQ_CHG_CHECK_TIME; // Convert fmax to the corresponding pstate l_pstate_from_fmax = proc_freq2pstate(g_amec->sys.fmax); for( l_core = 0; l_core < MAX_NUM_CORES; l_core++ ) { // If the core isn't present, skip it if(!CORE_PRESENT(l_core)) { l_pmc_pmsr_ffdc.pmsr_ffdc_data.data[l_core].value = 0; continue; } // Get pointer to core data l_core_data_ptr = proc_get_bulk_core_data_ptr(l_core); // Get the core's pmsr data l_pmc_pmsr_ffdc.pmsr_ffdc_data.data[l_core] = l_core_data_ptr->pcb_slave.pmsr; // Verify that the core is running at the correct frequency // If not, log an error if( (l_pstate_from_fmax != l_pmc_pmsr_ffdc.pmsr_ffdc_data.data[l_core].fields.local_pstate_actual) && (l_pstate_from_fmax > l_pmc_pmsr_ffdc.pmsr_ffdc_data.data[l_core].fields.pv_min) && (l_err == NULL) ) { TRAC_ERR("Frequency mismatch in core %d: actual_ps[%d] req_ps[%d] fmax[%d] mode[%d].", l_core, l_pmc_pmsr_ffdc.pmsr_ffdc_data.data[l_core].fields.local_pstate_actual, l_pstate_from_fmax, g_amec->sys.fmax, CURRENT_MODE()); fill_pmc_ffdc_buffer(&l_pmc_pmsr_ffdc.pmc_ffcdc_data); /* @ * @moduleid AMEC_VERIFY_FREQ_MID * @reasonCode TARGET_FREQ_FAILURE * @severity ERRL_SEV_PREDICTIVE * @userdata1 0 * @userdata2 0 * @userdata4 OCC_NO_EXTENDED_RC * @devdesc A core is not running at the expected frequency */ l_err = createErrl( AMEC_VERIFY_FREQ_MID, // i_modId, TARGET_FREQ_FAILURE, // i_reasonCode, OCC_NO_EXTENDED_RC, ERRL_SEV_UNRECOVERABLE, NULL, // i_trace, DEFAULT_TRACE_SIZE, // i_traceSz, 0, // i_userData1, 0); // i_userData2 //Add firmware callout addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH); //Add processor callout addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.proc_huid, ERRL_CALLOUT_PRIORITY_MED); } } if( l_err != NULL) { //Add our register dump to the error log addUsrDtlsToErrl(l_err, (uint8_t*) &l_pmc_pmsr_ffdc, sizeof(l_pmc_pmsr_ffdc), ERRL_USR_DTL_STRUCT_VERSION_1, ERRL_USR_DTL_BINARY_DATA); REQUEST_RESET(l_err); } } }
// Function Specification // // Name: amec_slv_check_perf // // Description: Slave OCC's Detect and log degraded performance errors // This function will run every tick. // // Thread: RealTime Loop // // Task Flags: // // End Function Specification void amec_slv_check_perf(void) { /*------------------------------------------------------------------------*/ /* Local Variables */ /*------------------------------------------------------------------------*/ static BOOLEAN l_prev_failsafe_state = FALSE; static BOOLEAN l_prev_ovs_state = FALSE; static BOOLEAN l_prev_pcap_state = FALSE; static ERRL_SEVERITY l_pcap_sev = ERRL_SEV_PREDICTIVE; static BOOLEAN l_throttle_traced = FALSE; static uint64_t l_time = 0; /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ // Verify that cores are at proper frequency amec_verify_pstate(); do { // was frequency limited by power ? if ( G_non_dps_power_limited != TRUE ) { if(l_throttle_traced) { TRAC_INFO("Frequency not limited by power algorithms anymore"); l_throttle_traced = FALSE; } // we are done break and return break; } // frequency limited due to failsafe condition ? if ( AMEC_INTF_GET_FAILSAFE() == TRUE ) { if ( l_prev_failsafe_state == TRUE) { // we are done break and return break; } else { // log this error ONLY ONCE per IPL l_prev_failsafe_state = TRUE; TRAC_ERR("Frequency limited due to failsafe condition(mode:%d, state:%d)", CURRENT_MODE(), CURRENT_STATE()); l_throttle_traced = TRUE; l_time = ssx_timebase_get(); // log error that calls out OVS procedure // set error severity to RRL_SEV_PREDICTIVE /* @ * @errortype * @moduleid AMEC_SLAVE_CHECK_PERFORMANCE * @reasoncode INTERNAL_FAILURE * @userdata1 Previous FailSafe State * @userdata4 ERC_AMEC_SLAVE_FAILSAFE_STATE * @devdesc Frequency limited due to failsafe condition */ errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId INTERNAL_FAILURE, //reasoncode ERC_AMEC_SLAVE_FAILSAFE_STATE,//Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size l_prev_failsafe_state, //userdata1 0); //userdata2 addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_OVERSUBSCRIPTION, ERRL_CALLOUT_PRIORITY_HIGH ); // and sets the consolidate action flag setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS ); // Commit Error commitErrl(&l_errl); // we are done lets break break; } } // frequency limited due to oversubscription condition ? if ( AMEC_INTF_GET_OVERSUBSCRIPTION() == TRUE ) { if ( l_prev_ovs_state == TRUE) { // we are done break and return break; } else { // log this error ONLY ONCE per IPL l_prev_ovs_state = TRUE; TRAC_ERR("Frequency limited due to oversubscription condition(mode:%d, state:%d)", CURRENT_MODE(), CURRENT_STATE()); l_throttle_traced = TRUE; l_time = ssx_timebase_get(); // log error that calls out OVS procedure // set error severity to RRL_SEV_PREDICTIVE // Updated the RC to match the actual RC passed to createErrl() /* @ * @errortype * @moduleid AMEC_SLAVE_CHECK_PERFORMANCE * @reasoncode OVERSUB_LIMIT_ALERT * @userdata1 Previous OVS State * @userdata4 ERC_AMEC_SLAVE_OVS_STATE * @devdesc Frequency limited due to oversubscription condition */ errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId OVERSUB_LIMIT_ALERT, //reasoncode ERC_AMEC_SLAVE_OVS_STATE, //Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size l_prev_ovs_state, //userdata1 0); //userdata2 // Callout to Oversubscription addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_OVERSUBSCRIPTION, ERRL_CALLOUT_PRIORITY_HIGH ); // Callout to APSS addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.apss_huid, ERRL_CALLOUT_PRIORITY_MED ); // Callout to Firmware addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_LOW ); // and sets the consolidate action flag setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS ); // Commit Error commitErrl(&l_errl); // we are done lets break break; } } uint16_t l_snrBulkPwr = AMECSENSOR_PTR(PWR250US)->sample; // frequency limited due to system power cap condition ? if (( l_snrBulkPwr > (G_sysConfigData.pcap.system_pcap - PDROP_THRESH) ) && ( G_sysConfigData.pcap.current_pcap == 0 )) { if ( l_prev_pcap_state == TRUE) { // we are done break and return break; } else { //log this error ONLY ONCE per IPL l_prev_pcap_state = TRUE; TRAC_ERR("Frequency limited due to power cap condition(mode:%d, state:%d)", CURRENT_MODE(), CURRENT_STATE()); TRAC_ERR("SnrBulkPwr %d > Sys Pcap %d ",l_snrBulkPwr, G_sysConfigData.pcap.system_pcap ); TRAC_ERR("SnrFanPwr %d, SnrIOPwr %d, SnrStoragePwr %d, SnrGpuPrw %d ", AMECSENSOR_PTR(PWR250USFAN)->sample, AMECSENSOR_PTR(PWR250USIO)->sample, AMECSENSOR_PTR(PWR250USSTORE)->sample, AMECSENSOR_PTR(PWR250USGPU)->sample ); TRAC_ERR("SnrProcPwr 0 %d, SnrProcPwr 1 %d, SnrProcPwr 2 %d, SnrProcPwr 3 %d", g_amec->proc_snr_pwr[0], g_amec->proc_snr_pwr[1], g_amec->proc_snr_pwr[2], g_amec->proc_snr_pwr[3] ); TRAC_ERR("SnrMemPwr 0 %d, SnrMemPwr 1 %d, SnrMemPwr 2 %d, SnrMemPwr 3 %d", g_amec->mem_snr_pwr[0], g_amec->mem_snr_pwr[1], g_amec->mem_snr_pwr[2], g_amec->mem_snr_pwr[3] ); l_throttle_traced = TRUE; l_time = ssx_timebase_get(); // log error that calls out firmware and APSS procedure // set error severity to l_pcap_sev /* @ * @errortype * @moduleid AMEC_SLAVE_CHECK_PERFORMANCE * @reasoncode PCAP_THROTTLE_POWER_LIMIT * @userdata1 Current Sensor Bulk Power * @userdata2 System PCAP * @userdata4 ERC_AMEC_SLAVE_POWERCAP * @devdesc Frequency limited due to PowerCap condition */ errlHndl_t l_errl = createErrl(AMEC_SLAVE_CHECK_PERFORMANCE, //modId PCAP_THROTTLE_POWER_LIMIT, //reasoncode ERC_AMEC_SLAVE_POWERCAP, //Extended reason code l_pcap_sev, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size l_snrBulkPwr, //userdata1 G_sysConfigData.pcap.system_pcap);//userdata2 addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH ); addCalloutToErrl( l_errl, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.apss_huid, ERRL_CALLOUT_PRIORITY_HIGH ); // and sets the consolidate action flag setErrlActions( l_errl, ERRL_ACTIONS_CONSOLIDATE_ERRORS ); // then l_pcap_sev to informational l_pcap_sev = ERRL_SEV_INFORMATIONAL; // Commit Error commitErrl(&l_errl); // we are done lets break break; } } // trottle trace to every 3600 seconds (1hr = 3600000) if(!l_throttle_traced && ( DURATION_IN_MS_UNTIL_NOW_FROM(l_time) > 3600000 ) ) { TRAC_INFO("Frequency power limited due to transient condition: PowerLimited=%x, FailSafe=%x, OverSubScription=%x CurrentBulkPwr=%x", G_non_dps_power_limited, AMEC_INTF_GET_FAILSAFE(), AMEC_INTF_GET_OVERSUBSCRIPTION(), l_snrBulkPwr ); l_throttle_traced = TRUE; l_time = ssx_timebase_get(); } } while( 0 ); return; }
// Function Specification // // Name: amec_update_vrm_sensors // // Description: Updates sensors that use data from the VRMs // (e.g., VR_FAN, FANS_FULL_SPEED, VR_HOT). // // Thread: RealTime Loop // // End Function Specification void amec_update_vrm_sensors(void) { /*------------------------------------------------------------------------*/ /* Local Variables */ /*------------------------------------------------------------------------*/ int l_rc = 0; int l_vrfan = 0; int l_softoc = 0; int l_minus_np1_regmode = 0; int l_minus_n_regmode = 0; static uint8_t L_error_count = 0; uint8_t l_pin = 0; uint8_t l_pin_value = 1; // active low, so set default to high uint8_t l_vrhot_count = 0; errlHndl_t l_err = NULL; /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ // Check if we have access to SPIVID. In DCMs only Master OCC has access to // the SPIVID. if (G_dcm_occ_role == OCC_DCM_MASTER) { // VR_FAN and SOFT_OC come from SPIVID l_rc = vrm_read_state(SPIVRM_PORT(0), &l_minus_np1_regmode, &l_minus_n_regmode, &l_vrfan, &l_softoc); if (l_rc == 0) { // Update the VR_FAN sensor sensor_update( AMECSENSOR_PTR(VRFAN250USPROC), (uint16_t)l_vrfan ); // Clear our error count and the 'read failure' flag (since we can // read VR_FAN signal) L_error_count = 0; G_thrm_fru_data[DATA_FRU_VRM].read_failure = 0; // Obtain the 'fan_full_speed' GPIO from APSS l_pin = G_sysConfigData.apss_gpio_map.fans_full_speed; // No longer reading gpio from APSS in GA1 due to instability in // APSS composite mode //apss_gpio_get(l_pin, &l_pin_value); // VR_HOT sensor is a counter of number of times the VRHOT signal // has been asserted l_vrhot_count = AMECSENSOR_PTR(VRHOT250USPROC)->sample; // Check if VR_FAN is asserted AND if 'fans_full_speed' GPIO is ON. // Note that this GPIO is active low. if (AMECSENSOR_PTR(VRFAN250USPROC)->sample && !(l_pin_value)) { // VR_FAN is asserted and 'fans_full_speed' GPIO is ON, // then increment our VR_HOT counter if (l_vrhot_count < g_amec->vrhotproc.setpoint) { l_vrhot_count++; } } else { // Reset our VR_HOT counter l_vrhot_count = 0; } sensor_update(AMECSENSOR_PTR(VRHOT250USPROC), l_vrhot_count); } else { // Increment our error count L_error_count++; // Don't allow the error count to wrap if (L_error_count == 0) { L_error_count = 0xFF; } // Log an error if we exceeded our number of fail-to-read sensor if ((L_error_count == g_amec->proc[0].vrfan_error_count) && (g_amec->proc[0].vrfan_error_count != 0xFF)) { TRAC_ERR("amec_update_vrm_sensors: Failed to read VR_FAN for %u consecutive times!", L_error_count); // Also, inform the thermal thread to send a cooling request G_thrm_fru_data[DATA_FRU_VRM].read_failure = 1; /* @ * @errortype * @moduleid AMEC_HEALTH_CHECK_VRFAN_TIMEOUT * @reasoncode VRM_VRFAN_TIMEOUT * @userdata1 timeout value * @userdata2 0 * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Failed to read VR_FAN signal from regulator. * */ l_err = createErrl(AMEC_HEALTH_CHECK_VRFAN_TIMEOUT, //modId VRM_VRFAN_TIMEOUT, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size g_amec->thermaldimm.temp_timeout, //userdata1 0); //userdata2 // Callout backplane for this VRM error addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.backplane_huid, ERRL_CALLOUT_PRIORITY_MED); // Commit the error commitErrl(&l_err); } } } if( 1 ) { sensor_update( AMECSENSOR_PTR(VRFAN250USMEM), 0 ); sensor_update( AMECSENSOR_PTR(VRHOT250USMEM), 0 ); } }
////////////////////////// // Function Specification // // Name: amec_pcap_calc // // Description: Calculate the node, memory and processor power caps. // // Thread: Real Time Loop // // End Function Specification void amec_pcap_calc(const bool i_oversub_state) { bool l_active_pcap_changed = FALSE; uint16_t l_node_pwr = AMECSENSOR_PTR(PWRSYS)->sample; uint16_t l_p0_pwr = AMECSENSOR_PTR(PWRPROC)->sample; int32_t l_avail_power = 0; uint16_t mem_pwr_diff = 0; uint32_t l_proc_fraction = 0; static uint32_t L_prev_node_pcap = 0; static bool L_apss_error_traced = FALSE; static uint32_t L_ticks_mem_pwr_available = 0; static bool L_trace_pcap_throttle = true; static bool L_trace_pcap_unthrottle = true; // Determine the active power cap. // when in oversub (N mode) only use oversub pcap if lower than user set pcap // OCC should allow N mode to be higher than N+1 (don't compare against norm_node_pcap) // N mode may be higher on some systems due to ps issue reporting higher power in N mode if( (TRUE == i_oversub_state) && (g_amec->pcap.ovs_node_pcap < G_sysConfigData.pcap.current_pcap) ) { g_amec->pcap.active_node_pcap = g_amec->pcap.ovs_node_pcap; } // norm_node_pcap is set as lowest between sys (N+1 mode) and // user in amec_data_write_pcap() else { g_amec->pcap.active_node_pcap = g_amec->pcap.norm_node_pcap; } //Trace whenever the node pcap changes if(L_prev_node_pcap != g_amec->pcap.active_node_pcap) { TRAC_IMP("amec_pcap_calc: Node pcap set to %d watts.", g_amec->pcap.active_node_pcap); L_prev_node_pcap = g_amec->pcap.active_node_pcap; // set this pcap as valid (needed by master for comparison) g_amec->pcap_valid = 1; l_active_pcap_changed = TRUE; } l_avail_power = g_amec->pcap.active_node_pcap - l_node_pwr; // Determine GPU power cap if there are GPUs present if(G_first_proc_gpu_config) { amec_gpu_pcap(i_oversub_state, l_active_pcap_changed, l_avail_power); } if(l_node_pwr != 0) { l_proc_fraction = ((uint32_t)(l_p0_pwr) << 16)/l_node_pwr; if(L_apss_error_traced) { TRAC_ERR("PCAP: PWRSYS sensor is no longer 0."); L_apss_error_traced = FALSE; } // check if allowed to increase power AND memory throttled due to pcap if((l_avail_power > 0) && (g_amec->pcap.active_mem_level != 0)) { // un-throttle memory if there is enough available power between // current and new throttles if (CURRENT_MODE() == OCC_MODE_NOMINAL) { mem_pwr_diff = g_amec->pcap.nominal_mem_pwr; } else { mem_pwr_diff = g_amec->pcap.turbo_mem_pwr; } // currently there's only 1 mem pcap throt level so must be pcap1 mem_pwr_diff -= g_amec->pcap.pcap1_mem_pwr; if(l_avail_power >= mem_pwr_diff) { L_ticks_mem_pwr_available++; if( L_ticks_mem_pwr_available == UNTHROTTLE_MEMORY_DELAY ) { if( L_trace_pcap_unthrottle || (G_allow_trace_flags & ALLOW_MEM_TRACE) ) { TRAC_IMP("PCAP: Un-Throttling memory"); L_trace_pcap_unthrottle = false; } g_amec->pcap.active_mem_level = 0; L_ticks_mem_pwr_available = 0; // don't let the proc have any available power this tick l_avail_power = 0; } } } // check if need to reduce power and frequency is already at the min else if(l_avail_power < 0) { L_ticks_mem_pwr_available = 0; // if memory is not throttled and frequency is at min shed additional power // by throttling memory if( (g_amec->pcap.active_mem_level == 0) && (g_amec->proc[0].pwr_votes.ppb_fmax == g_amec->sys.fmin) ) { if( L_trace_pcap_throttle || (G_allow_trace_flags & ALLOW_MEM_TRACE) ) { TRAC_IMP("PCAP: Throttling memory"); L_trace_pcap_throttle = false; } g_amec->pcap.active_mem_level = 1; } } else { // no changes to memory throttles due to power } } else { if(!L_apss_error_traced) { TRAC_ERR("PCAP: PWRSYS sensor is showing a value of 0."); L_apss_error_traced = TRUE; } } // skip processor changes until memory is un-capped if(!g_amec->pcap.active_mem_level) { g_amec->pcap.active_proc_pcap = l_p0_pwr + ((l_proc_fraction * l_avail_power) >> 16); //NOTE: Power capping will not affect nominal cores unless a customer pcap // is set below the max pcap or oversubscription occurs. However, // nominal cores will drop below nominal if ppb_fmax drops below nominal if(g_amec->pcap.active_node_pcap < G_sysConfigData.pcap.max_pcap) { g_amec->proc[0].pwr_votes.nom_pcap_fmin = G_sysConfigData.sys_mode_freq.table[OCC_MODE_MIN_FREQUENCY]; } else { g_amec->proc[0].pwr_votes.nom_pcap_fmin = G_sysConfigData.sys_mode_freq.table[OCC_MODE_NOMINAL]; } }
// Function Specification // // Name: querySensorList // // Description: Query sensor list // // End Function Specification errlHndl_t querySensorList(const querySensorListArg_t * i_argPtr) { errlHndl_t l_err = NULL; /* TEMP -- NOT SUPPORTED ( NEED AMEC/DCOM ) */ #if 0 if (i_argPtr != NULL) { uint16_t i_startGsid = i_argPtr->i_startGsid; uint8_t i_present = i_argPtr->i_present; uint16_t i_type = i_argPtr->i_type; uint16_t i_loc = i_argPtr->i_loc; uint16_t * io_numOfSensors = i_argPtr->io_numOfSensors; sensorQueryList_t * o_sensors = i_argPtr->o_sensors; sensor_info_t * o_sensorInfoPtrs= i_argPtr->o_sensorInfoPtrs; // Validate input parameters if( (i_startGsid >= NUMBER_OF_SENSORS_IN_LIST) || ((o_sensors == NULL) && (o_sensorInfoPtrs ==NULL)) || (io_numOfSensors == NULL)) { TRAC_ERR("querySensorList: Invalid input pointers OR start GSID is out of range: " "i_startGsid: 0x%x, G_amec_sensor_count: 0x%x", i_startGsid,G_amec_sensor_count); /* @ * @errortype * @moduleid SENSOR_QUERY_LIST * @reasoncode INTERNAL_INVALID_INPUT_DATA * @userdata1 i_startGsid -- passed in Global Sensor ID * @userdata2 G_amec_sensor_count -- number of OCC sensors * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Firmware failure caused due to invalid GSID passed */ /* @ * @errortype * @moduleid SENSOR_QUERY_LIST * @reasoncode INTERNAL_FAILURE * @userdata1 i_startGsid -- passed in Global Sensor ID * @userdata2 G_amec_sensor_count -- number of OCC sensors * @userdata4 OCC_NO_EXTENDED_RC * @devdesc NULL pointer passed for querySensorList output args */ l_err = createErrl(SENSOR_QUERY_LIST, //modId ((i_startGsid >= NUMBER_OF_SENSORS_IN_LIST) ? INTERNAL_INVALID_INPUT_DATA : INTERNAL_FAILURE), //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_PREDICTIVE, //Severity NULL, //Trace Buf 0, //Trace Size i_startGsid, //userdata1 G_amec_sensor_count //userdata2 ); } else { uint32_t l_cnt = i_startGsid; uint32_t l_num = *io_numOfSensors; *io_numOfSensors = 0; // Traverse through sensor list starting at i_startGsid to find // matching sensor. Return it in the output variable for (; (l_cnt < NUMBER_OF_SENSORS_IN_LIST && ((*io_numOfSensors) < l_num)); l_cnt++) { // If sample value is not zero then it means sensor is present. // This is currently only used by debug/mfg purpose // If user is looking for present sensors and sample is zero, // then don't include current sensor in the query list if ((i_present) && (G_amec_sensor_list[l_cnt]->sample == 0)) { continue; } // If user is NOT looking for any sensor type and input type, // does not match the current sensor type, then don't include // current sensor in the query list if ((i_type & G_sensor_info[l_cnt].sensor.type) == 0) { continue; } // If user is NOT looking for any sensor location and input loc, // does not match the current sensor location, then don't include // current sensor in the query list if ((i_loc & G_sensor_info[l_cnt].sensor.location) == 0) { continue; } if (o_sensors != NULL) { // All conditions match. Include current sensor in the query list // Copy gsid, name and sample o_sensors->gsid = l_cnt; strncpy(o_sensors->name, G_sensor_info[l_cnt].name, MAX_SENSOR_NAME_SZ); o_sensors->sample = G_amec_sensor_list[l_cnt]->sample; o_sensors++; } if (o_sensorInfoPtrs != NULL) { memcpy(o_sensorInfoPtrs, &G_sensor_info[l_cnt], sizeof(sensor_info_t)); o_sensorInfoPtrs++; } (*io_numOfSensors)++; } } } else { TRAC_ERR("querySensorList: Invalid argument pointer = NULL"); /* @ * @errortype * @moduleid SENSOR_QUERY_LIST * @reasoncode INTERNAL_INVALID_INPUT_DATA * @userdata1 NULL * @userdata2 NULL * @userdata4 ERC_ARG_POINTER_FAILURE * @devdesc NULL pointer passed to querySensorList applet */ l_err = createErrl( SENSOR_QUERY_LIST, // Module ID INTERNAL_INVALID_INPUT_DATA, // Reason Code ERC_ARG_POINTER_FAILURE, // Extended reason code ERRL_SEV_PREDICTIVE, // Severity NULL, // Trace 0, // Trace Size 0, // UserData 1 0 // UserData 2 ); } #endif return l_err; }
// Function Specification // // Name: cmdh_mnfg_list_sensors // // Description: Returns a list of selected sensors // // End Function Specification uint8_t cmdh_mnfg_list_sensors(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; uint16_t l_type = 0; uint16_t l_location = 0; uint16_t l_start_gsid; uint16_t i = 0; uint16_t l_resp_data_length = 0; uint16_t l_datalength; uint16_t l_num_of_sensors = MFG_MAX_NUM_SENSORS + 1; cmdh_mfg_list_sensors_query_t *l_cmd_ptr = (cmdh_mfg_list_sensors_query_t*) i_cmd_ptr; cmdh_mfg_list_sensors_resp_t *l_resp_ptr = (cmdh_mfg_list_sensors_resp_t*) o_rsp_ptr; sensorQueryList_t l_sensor_list[MFG_MAX_NUM_SENSORS + 1]; errlHndl_t l_err = NULL; do { // Do sanity check on the function inputs if ((NULL == i_cmd_ptr) || (NULL == o_rsp_ptr)) { TRAC_ERR("cmdh_mnfg_list_sensors: invalid pointers. cmd[0x%08x] rsp[0x%08x]", (uint32_t) i_cmd_ptr, (uint32_t) o_rsp_ptr); l_rc = ERRL_RC_INTERNAL_FAIL; break; } // Check packet data length l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr); if(l_datalength < (sizeof(cmdh_mfg_list_sensors_query_t) - sizeof(cmdh_fsp_cmd_header_t))) { TRAC_ERR("cmdh_mnfg_list_sensors: incorrect data length. exp[%d] act[%d]", (sizeof(cmdh_mfg_list_sensors_query_t) - sizeof(cmdh_fsp_cmd_header_t)), l_datalength); l_rc = ERRL_RC_INVALID_CMD_LEN; break; } // Check version if(l_cmd_ptr->version != MFG_LIST_SENSOR_VERSION) { TRAC_ERR("cmdh_mnfg_list_sensors: incorrect version. exp[%d] act[%d]", MFG_LIST_SENSOR_VERSION, l_cmd_ptr->version); l_rc = ERRL_RC_INVALID_DATA; break; } // Capture user inputs l_type = l_cmd_ptr->type; l_location = l_cmd_ptr->location; l_start_gsid = l_cmd_ptr->start_gsid; TRAC_INFO("cmdh_mnfg_list_sensors: Type[0x%04x] Location[0x%04x]", l_type, l_location); // Initialize the sensor query arguments const querySensorListArg_t l_qsl_arg = { l_start_gsid, // i_startGsid - passed by the caller l_cmd_ptr->present, // i_present - passed by the caller l_type, // i_type - passed by the caller l_location, // i_loc - passed by the caller &l_num_of_sensors, // io_numOfSensors l_sensor_list, // o_sensors NULL // o_sensorInfoPtr - not needed }; // Get the list of sensors l_err = querySensorList(&l_qsl_arg); if (NULL != l_err) { // Query failure TRAC_ERR("cmdh_mnfg_list_sensors: Failed to query sensor list. Error status is: 0x%x", l_err->iv_reasonCode); // Commit error log commitErrl(&l_err); l_rc = ERRL_RC_INTERNAL_FAIL; break; } else { TRAC_INFO("cmdh_mnfg_list_sensors: Numbers of sensors found[%u]", l_num_of_sensors); if (l_num_of_sensors > MFG_MAX_NUM_SENSORS) { // Got too many sensors back, need to truncate the list TRAC_INFO("cmdh_mnfg_list_sensors: Got too many sensors back[%u]. Truncating number of sensors to %u", l_num_of_sensors, MFG_MAX_NUM_SENSORS); l_num_of_sensors = MFG_MAX_NUM_SENSORS; l_resp_ptr->truncated = 1; } else { l_resp_ptr->truncated = 0; } // Clear out the sensor fields memset((void*) &(l_resp_ptr->sensor[0]), 0, (sizeof(cmdh_dbug_sensor_list_t)*l_num_of_sensors) ); // Populate the response data packet l_resp_ptr->num_sensors = l_num_of_sensors; for (i=0; i<l_num_of_sensors; i++) { l_resp_ptr->sensor[i].gsid = l_sensor_list[i].gsid; l_resp_ptr->sensor[i].sample = l_sensor_list[i].sample; strcpy(l_resp_ptr->sensor[i].name, l_sensor_list[i].name); } } }while(0); // Populate the response data header l_resp_data_length = 2 + l_num_of_sensors * sizeof(cmdh_mfg_sensor_rec_t); G_rsp_status = l_rc; o_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0]; o_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1]; return l_rc; }
int traceFuncTest() { UINT l_rc = 0; UINT l_max_trace_entries = TRACE_BUFFER_SIZE / MIN_TRACE_ENTRY_SIZE; UINT l_entry_count = 0; UINT l_buffer_size = 0; tracDesc_t l_head = NULL; do { // Test target - trac_write_XXX(), TRAC_get_buffer() and TRAC_get_td() // This testcase would create l_max_trace_entries +1 trace entries // to fill trace buffer, times_wrap should be larger than zero do{ l_entry_count++; TRAC_INFO("traceTest applet INFO record: count %d", (int)l_entry_count); TRAC_ERR("traceTest applet ERR record: count %d", (int)l_entry_count); TRAC_IMP("traceTest applet IMP record: count %d", (int)l_entry_count); }while(l_max_trace_entries >= l_entry_count); // Check times_wrap in TRAC_INFO. // Because structures are all the same, skip TRAC_ERR and TRAC_IMP l_rc = TRAC_get_buffer(TRAC_get_td("INF"), G_trac_buffer); l_head = (tracDesc_t)&G_trac_buffer; if((l_rc != 0 ) || (l_head->times_wrap == 0)) { printf("Fail: times_wrap error in trace buffer: %d, %d\n", l_rc, l_head->times_wrap); break; } // Test target - TRAC_get_buffer() and TRAC_get_td() // case: invalid parameters l_rc = TRAC_get_buffer(TRAC_get_td("INF"), NULL); l_head = (tracDesc_t)&G_trac_buffer; if(l_rc == 0) { printf("TRAC_get_buffer(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer() invalid 1th parameter\n"); break; } l_rc = TRAC_get_buffer(NULL, G_trac_buffer); l_head = (tracDesc_t)&G_trac_buffer; if(l_rc == 0) { printf("TRAC_get_buffer(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer() invalid 2nd parameter\n"); break; } // Test target - TRAC_get_buffer_partial() and TRAC_get_td() // case: invalid parameters l_buffer_size = TRACE_BUFFER_SIZE; l_rc = TRAC_get_buffer_partial(NULL, G_trac_buffer, &l_buffer_size); if((l_rc != TRAC_INVALID_PARM) && (l_buffer_size !=0)) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() invalid 1st parameter\n"); break; } l_rc = TRAC_get_buffer_partial(TRAC_get_td("UNKNOWN"), NULL, &l_buffer_size); if((l_rc != TRAC_INVALID_PARM) && (l_buffer_size !=0)) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() invalid 1st parameter\n"); break; } l_rc = TRAC_get_buffer_partial(TRAC_get_td("INF"), NULL, &l_buffer_size); if((l_rc != TRAC_INVALID_PARM) && (l_buffer_size !=0)) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() invalid 2nd parameter\n"); break; } l_rc = TRAC_get_buffer_partial(TRAC_get_td("ERR"), G_trac_buffer, NULL); if(l_rc != TRAC_INVALID_PARM) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() invalid 3rd parameter\n"); break; } // Test target - TRAC_get_buffer_partial() // case: input buffer less then the size of trace buffer header l_buffer_size = sizeof(trace_buf_head_t) - 1; l_rc = TRAC_get_buffer_partial(TRAC_get_td("IMP"), G_trac_buffer, &l_buffer_size); if(l_rc != TRAC_DATA_SIZE_LESS_THAN_HEADER_SIZE) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() with illegal small input buffer\n"); break; } // Test target - TRAC_get_buffer_partial() // case: input buffer is small then then trace buffer l_buffer_size = sizeof(trace_buf_head_t) + (TRACE_BUFFER_SIZE/4); l_rc = TRAC_get_buffer_partial(TRAC_get_td("INF"), G_trac_buffer, &l_buffer_size); if(l_rc) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() with small input buffer\n"); break; } // Test target - TRAC_get_buffer_partial() // case: input buffer is larger then trace buffer l_buffer_size = sizeof(G_trac_buffer); l_rc = TRAC_get_buffer_partial(TRAC_get_td("INF"), G_trac_buffer, &l_buffer_size); if(l_rc || (l_buffer_size != TRACE_BUFFER_SIZE)) { printf("TRAC_get_buffer_partial(), reason code: %d size %d/%d\n", l_rc, l_buffer_size, TRACE_BUFFER_SIZE); printf("Fail: test TRAC_get_buffer_partial() with too large input buffer\n"); break; } // Test target - TRAC_reset_buf() and TRAC_get_buffer_partial() // case: clear trace buffer and check with buffer larger than trace buffer TRAC_reset_buf(); l_buffer_size = sizeof(G_trac_buffer); l_rc = TRAC_get_buffer_partial(TRAC_get_td("ERR"), G_trac_buffer, &l_buffer_size); if(l_rc) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_reset_buf()/TRAC_get_buffer_partial() with empty trace\n"); break; } // Test target - TRAC_reset_buf() and TRAC_get_buffer_partial() // case: clear trace buffer and check it with buffer smaller than trace buffer TRAC_reset_buf(); l_buffer_size = TRACE_BUFFER_SIZE/2; l_rc = TRAC_get_buffer_partial(TRAC_get_td("ERR"), G_trac_buffer, &l_buffer_size); if(l_rc) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_reset_buf()/TRAC_get_buffer_partial() with empty trace\n"); break; } // Test target - TRAC_get_buffer_partial() // case: create some traces and test with large input buffer l_entry_count = 0; do{ l_entry_count++; TRAC_INFO("traceTest applet INFO record: count %d", (int)l_entry_count); TRAC_ERR("traceTest applet ERR record: count %d", (int)l_entry_count); TRAC_IMP("traceTest applet IMP record: count %d", (int)l_entry_count); }while((l_max_trace_entries/4) >= l_entry_count); l_buffer_size = TRACE_BUFFER_SIZE; l_rc = TRAC_get_buffer_partial(TRAC_get_td("IMP"), G_trac_buffer, &l_buffer_size); l_head = (tracDesc_t)&G_trac_buffer; if(l_rc || (l_head->times_wrap != 0)) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() with large input buffer\n"); break; } // Test target - TRAC_get_buffer_partial() // case: create some traces and test with small input buffer l_buffer_size = sizeof(trace_buf_head_t) + (TRACE_BUFFER_SIZE/4); l_rc = TRAC_get_buffer_partial(TRAC_get_td("INF"), G_trac_buffer, &l_buffer_size); if(l_rc) { printf("TRAC_get_buffer_partial(), reason code: %d\n", l_rc); printf("Fail: test TRAC_get_buffer_partial() with small input buffer\n"); break; } }while(0); return l_rc; }
// Function Specification // // Name: cmdh_mnfg_get_sensor // // Description: Returns a list of selected sensors // // End Function Specification uint8_t cmdh_mnfg_get_sensor(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; uint16_t l_gsid; uint16_t l_resp_data_length = 0; uint16_t l_datalength; uint16_t l_num_of_sensors = 1; cmdh_mfg_get_sensor_query_t *l_cmd_ptr = (cmdh_mfg_get_sensor_query_t*) i_cmd_ptr; cmdh_mfg_get_sensor_resp_t *l_resp_ptr = (cmdh_mfg_get_sensor_resp_t*) o_rsp_ptr; sensor_info_t l_sensor_info; errlHndl_t l_err = NULL; sensor_t* l_sensor_ptr; do { // Do sanity check on the function inputs if ((NULL == i_cmd_ptr) || (NULL == o_rsp_ptr)) { TRAC_ERR("cmdh_mnfg_get_sensor: invalid pointers. cmd[0x%08x] rsp[0x%08x]", (uint32_t) i_cmd_ptr, (uint32_t) o_rsp_ptr); l_rc = ERRL_RC_INTERNAL_FAIL; break; } // Check packet data length l_datalength = CMDH_DATALEN_FIELD_UINT16(i_cmd_ptr); if(l_datalength < (sizeof(cmdh_mfg_get_sensor_query_t) - sizeof(cmdh_fsp_cmd_header_t))) { TRAC_ERR("cmdh_mnfg_get_sensor: incorrect data length. exp[%d] act[%d]", (sizeof(cmdh_mfg_get_sensor_query_t) - sizeof(cmdh_fsp_cmd_header_t)), l_datalength); l_rc = ERRL_RC_INVALID_CMD_LEN; break; } // Check version if(l_cmd_ptr->version != MFG_LIST_SENSOR_VERSION) { TRAC_ERR("cmdh_mnfg_get_sensor: incorrect version. exp[%d] act[%d]", MFG_GET_SENSOR_VERSION, l_cmd_ptr->version); l_rc = ERRL_RC_INVALID_DATA; break; } // Capture user inputs l_gsid = l_cmd_ptr->gsid; TRAC_INFO("cmdh_mnfg_get_sensor: gsid[0x%04x]", l_gsid); // Initialize the sensor query arguments querySensorListArg_t l_qsl_arg = { l_gsid, // i_startGsid - passed by the caller 0, // i_present - passed by the caller AMEC_SENSOR_TYPE_ALL, // i_type AMEC_SENSOR_LOC_ALL, // i_loc &l_num_of_sensors, // io_numOfSensors NULL, // o_sensors - not needed &l_sensor_info // o_sensorInfoPtr }; // Get the sensor list l_err = querySensorList(&l_qsl_arg); if (NULL != l_err) { // Query failure TRAC_ERR("cmdh_mnfg_get_sensor: Failed to get sensor list. Error status is: 0x%x", l_err->iv_reasonCode); // Commit error log commitErrl(&l_err); l_rc = ERRL_RC_INTERNAL_FAIL; break; } else { l_resp_ptr->gsid = l_gsid; // Some of the response comes from the sensor l_sensor_ptr = getSensorByGsid(l_gsid); if (l_sensor_ptr == NULL) { TRAC_INFO("cmdh_mnfg_get_sensor: Didn't find sensor with gsid[0x%.4X]. Min/Max values won't be accurate.", l_gsid); l_resp_ptr->sample = 0; l_resp_ptr->min = 0xFFFF; l_resp_ptr->max = 0; l_resp_ptr->accumulator = 0; l_resp_ptr->status = 0; } else { l_resp_ptr->sample = l_sensor_ptr->sample; l_resp_ptr->min = l_sensor_ptr->sample_min; l_resp_ptr->max = l_sensor_ptr->sample_max; // Truncate accumulator to 4 bytes (should not be used) l_resp_ptr->accumulator = (uint32_t)l_sensor_ptr->accumulator; l_resp_ptr->status = *(uint8_t*)(&l_sensor_ptr->status); } // The rest of the response comes from the sensor info memcpy(l_resp_ptr->name, l_sensor_info.name, sizeof(l_resp_ptr->name)); memcpy(l_resp_ptr->units, l_sensor_info.sensor.units, sizeof(l_resp_ptr->units)); l_resp_ptr->freq = l_sensor_info.sensor.freq; l_resp_ptr->scalefactor = l_sensor_info.sensor.scalefactor; l_resp_ptr->location = l_sensor_info.sensor.location; l_resp_ptr->type = l_sensor_info.sensor.type; } }while(0); // Populate the response data header l_resp_data_length = sizeof(cmdh_mfg_get_sensor_resp_t) - sizeof(cmdh_fsp_rsp_header_t); G_rsp_status = l_rc; o_rsp_ptr->data_length[0] = ((uint8_t *)&l_resp_data_length)[0]; o_rsp_ptr->data_length[1] = ((uint8_t *)&l_resp_data_length)[1]; return l_rc; }
void task_centaur_control( task_t * i_task ) { errlHndl_t l_err = NULL; // Error handler int rc = 0; // Return code uint32_t l_cent; amec_centaur_t *l_cent_ptr = NULL; static uint8_t L_scom_timeout[MAX_NUM_CENTAURS] = {0}; //track # of consecutive failures static bool L_gpe_scheduled = FALSE; static uint8_t L_gpe_fail_logged = 0; static bool L_gpe_idle_traced = FALSE; static bool L_gpe_had_1_tick = FALSE; // Pointer to the task data structure centaur_control_task_t * l_centControlTask = (centaur_control_task_t *) i_task->data_ptr; // Pointer to parameter field for GPE request GpeScomParms * l_parms = (GpeScomParms *)(l_centControlTask->gpe_req.parameter); do { l_cent = l_centControlTask->curCentaur; l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur; //First, check to see if the previous GPE request still running //A request is considered idle if it is not attached to any of the //asynchronous request queues if( !(async_request_is_idle(&l_centControlTask->gpe_req.request)) ) { L_scom_timeout[l_cent]++; //This can happen due to variability in when the task runs if(!L_gpe_idle_traced && L_gpe_had_1_tick) { TRAC_INFO("task_centaur_control: GPE is still running. cent[%d]", l_cent); l_centControlTask->traceThresholdFlags |= CENTAUR_CONTROL_GPE_STILL_RUNNING; L_gpe_idle_traced = TRUE; } L_gpe_had_1_tick = TRUE; break; } else { //Request is idle L_gpe_had_1_tick = FALSE; if(L_gpe_idle_traced) { TRAC_INFO("task_centaur_control: GPE completed. cent[%d]", l_cent); L_gpe_idle_traced = FALSE; } } //check scom status if(L_gpe_scheduled) { if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) { if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent))) { // Check if the centaur has a channel checkstop. If it does, // then do not log any errors. We also don't want to throttle // a centaur that is in this condition. if(!(cent_chan_checkstop(l_cent))) { L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> l_cent; TRAC_ERR("task_centaur_control: gpe_scom_centaur failed. l_cent=%d rc=%x, index=0x%08x", l_cent, l_parms->rc, l_parms->errorIndex); /* @ * @errortype * @moduleid CENT_TASK_CONTROL_MOD * @reasoncode CENT_SCOM_ERROR * @userdata1 rc - Return code of scom operation * @userdata2 index of scom operation that failed * @userdata4 OCC_NO_EXTENDED_RC * @devdesc OCC access to centaur failed */ l_err = createErrl( CENT_TASK_CONTROL_MOD, // modId CENT_SCOM_ERROR, // reasoncode OCC_NO_EXTENDED_RC, // Extended reason code ERRL_SEV_PREDICTIVE, // Severity NULL, // Trace Buf DEFAULT_TRACE_SIZE, // Trace Size l_parms->rc, // userdata1 l_parms->errorIndex // userdata2 ); addUsrDtlsToErrl(l_err, //io_err (uint8_t *) &(l_centControlTask->gpe_req.ffdc), //i_dataPtr, sizeof(PoreFfdc), //i_size ERRL_USR_DTL_STRUCT_VERSION_1, //version ERRL_USR_DTL_BINARY_DATA); //type //callout the centaur addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.centaur_huids[l_cent], ERRL_CALLOUT_PRIORITY_MED); //callout the processor addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.proc_huid, ERRL_CALLOUT_PRIORITY_MED); commitErrl(&l_err); } }//if(l_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent)) //Request failed. Keep count of failures and request a reset if we reach a //max retry count L_scom_timeout[l_cent]++; if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT) { break; } }//if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) else { //request completed successfully. reset the timeout. L_scom_timeout[l_cent] = 0; } }//if(L_gpe_scheduled)
// Function Specification // // Name: cmdh_mnfg_run_stop_slew // // Description: This function handles the manufacturing command to start // or stop frequency autoslewing. // // End Function Specification uint8_t cmdh_mnfg_run_stop_slew(const cmdh_fsp_cmd_t * i_cmd_ptr, cmdh_fsp_rsp_t * o_rsp_ptr) { uint8_t l_rc = ERRL_RC_SUCCESS; uint16_t l_fmin = 0; uint16_t l_fmax = 0; uint16_t l_step_size = 0; uint16_t l_step_delay = 0; uint32_t l_temp = 0; mnfg_run_stop_slew_cmd_t *l_cmd_ptr = (mnfg_run_stop_slew_cmd_t*) i_cmd_ptr; mnfg_run_stop_slew_rsp_t *l_rsp_ptr = (mnfg_run_stop_slew_rsp_t*) o_rsp_ptr; do { // This command is only supported on Master OCC if (G_occ_role == OCC_SLAVE) { TRAC_ERR("cmdh_mnfg_run_stop_slew: Mnfg command not supported on Slave OCCs!"); break; } // Do some basic input verification if ((l_cmd_ptr->action > MNFG_INTF_SLEW_STOP) || (l_cmd_ptr->step_mode > MNFG_INTF_FULL_SLEW)) { // Invalid values were passed by the user! TRAC_ERR("cmdh_mnfg_run_stop_slew: Invalid values were detected! action[0x%02x] step_mode[0x%02x]", l_cmd_ptr->action, l_cmd_ptr->step_mode); l_rc = ERRL_RC_INVALID_DATA; break; } // Are we stopping the auto-slew function? if (l_cmd_ptr->action == MNFG_INTF_SLEW_STOP) { // Collect the slew count l_rsp_ptr->slew_count = AMEC_MST_CUR_SLEW_COUNT(); // Collect the frequency range used for the auto-slew l_rsp_ptr->fstart = AMEC_MST_CUR_MNFG_FMIN(); l_rsp_ptr->fstop = AMEC_MST_CUR_MNFG_FMAX(); TRAC_INFO("cmdh_mnfg_run_stop_slew: Auto-slewing has been stopped. Count[%u] fstart[%u] fstop[%u]", AMEC_MST_CUR_SLEW_COUNT(), AMEC_MST_CUR_MNFG_FMIN(), AMEC_MST_CUR_MNFG_FMAX()); // Send a signal to RTL to stop auto-slewing AMEC_MST_STOP_AUTO_SLEW(); // We are done break; } // If we made it here, that means we are starting up a slew run // First, determine the Fmax and Fmin for the slew run if (l_cmd_ptr->bottom_mode == OCC_MODE_PWRSAVE) { // If bottom mode is Static Power Save, use the min frequency // available l_fmin = G_sysConfigData.sys_mode_freq.table[OCC_MODE_MIN_FREQUENCY]; } else { l_fmin = G_sysConfigData.sys_mode_freq.table[l_cmd_ptr->bottom_mode]; } l_fmax = G_sysConfigData.sys_mode_freq.table[l_cmd_ptr->high_mode]; // Add the percentages to compute the min/max frequencies l_fmin = l_fmin + (l_fmin * l_cmd_ptr->bottom_percent)/100; l_fmax = l_fmax + (l_fmax * l_cmd_ptr->high_percent)/100; TRAC_INFO("cmdh_mnfg_run_stop_slew: We are about to start auto-slewing function"); TRAC_INFO("cmdh_mnfg_run_stop_slew: bottom_mode[0x%.2X] freq[%u] high_mode[0x%.2X] freq[%u]", l_cmd_ptr->bottom_mode, l_fmin, l_cmd_ptr->high_mode, l_fmax); // Determine the frequency step size and the step delay if (l_cmd_ptr->step_mode == MNFG_INTF_FULL_SLEW) { l_step_size = l_fmax - l_fmin; // Disable step delays if full slew mode has been selected l_step_delay = 0; TRAC_INFO("cmdh_mnfg_run_stop_slew: Enabling full-slew mode with step_size[%u] step_delay[%u]", l_step_size, l_step_delay); } else { l_step_size = (uint16_t)G_mhz_per_pstate; // Translate the step delay to internal OCC ticks l_temp = (l_cmd_ptr->step_delay * 1000) / AMEC_US_PER_TICK; l_step_delay = (uint16_t) l_temp; TRAC_INFO("cmdh_mnfg_run_stop_slew: Enabling single-step mode with step_size[%u] step_delay[%u]", l_step_size, l_step_delay); } // Now, load the values for RTL consumption AMEC_MST_SET_MNFG_FMIN(l_fmin); AMEC_MST_SET_MNFG_FMAX(l_fmax); AMEC_MST_SET_MNFG_FSTEP(l_step_size); AMEC_MST_SET_MNFG_DELAY(l_step_delay); // Reset the slew-counter before we start auto-slewing AMEC_MST_CUR_SLEW_COUNT() = 0; // Wait a little bit for RTL to process above parameters ssx_sleep(SSX_MILLISECONDS(5)); // Send a signal to RTL to start auto-slewing AMEC_MST_START_AUTO_SLEW(); // We are auto-slewing now, populate the response packet l_rsp_ptr->slew_count = 0; l_rsp_ptr->fstart = l_fmin; l_rsp_ptr->fstop = l_fmax; }while(0); // Populate the response data packet G_rsp_status = l_rc; l_rsp_ptr->data_length[0] = 0; l_rsp_ptr->data_length[1] = MNFG_INTF_RUN_STOP_SLEW_RSP_SIZE; return l_rc; }
////////////////////////// // Function Specification // // Name: amec_gpu_pcap // // Description: Determine power cap for GPUs // // Thread: Real Time Loop // // End Function Specification void amec_gpu_pcap(bool i_oversubscription, bool i_active_pcap_changed, int32_t i_avail_power) { /*------------------------------------------------------------------------*/ /* Local Variables */ /*------------------------------------------------------------------------*/ uint8_t i = 0; uint32_t l_gpu_cap_mw = 0; uint16_t l_system_gpu_total_pcap = 0; // total GPU pcap required by system based on if currently in oversub or not static uint16_t L_total_gpu_pcap = 0; // Current total GPU pcap in effect static uint16_t L_n_plus_1_mode_gpu_total_pcap = 0; // Total GPU pcap required for N+1 (not in oversubscription) static uint16_t L_n_mode_gpu_total_pcap = 0; // Total GPU pcap required for oversubscription static uint16_t L_active_psr_gpu_total_pcap = 0; // Total GPU pcap for the currently set pcap and PSR static uint16_t L_per_gpu_pcap = 0; // Amount of L_total_gpu_pcap for each GPU static uint8_t L_psr = 100; // PSR value used in L_active_psr_gpu_total_pcap calculation static bool L_first_run = TRUE; // for calculations done only 1 time static uint32_t L_last_pcap_traced[MAX_NUM_GPU_PER_DOMAIN] = {0}; /*------------------------------------------------------------------------*/ /* Code */ /*------------------------------------------------------------------------*/ // If this is the first time running calculate the total GPU power cap for system power caps (N and N+1) if(L_first_run) { // calculate total GPU power cap for oversubscription if(g_amec->pcap.ovs_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) { // Take all non-GPU power away from the oversubscription power cap L_n_mode_gpu_total_pcap = g_amec->pcap.ovs_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs L_n_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts; } else { // This should not happen, the total non GPU power should never be higher than the N mode cap // Log error and set GPUs to minimum power cap L_n_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N mode pwr limit %dW", G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.ovs_node_pcap); /* @ * @errortype * @moduleid AMEC_GPU_PCAP_MID * @reasoncode GPU_FAILURE * @userdata1 N mode Power Cap watts * @userdata2 Total non-GPU power watts * @userdata4 ERC_GPU_N_MODE_PCAP_CALC_FAILURE * @devdesc Total non-GPU power more than N mode power cap * */ errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID, GPU_FAILURE, ERC_GPU_N_MODE_PCAP_CALC_FAILURE, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, g_amec->pcap.ovs_node_pcap, G_sysConfigData.total_non_gpu_max_pwr_watts); //Callout firmware addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH); commitErrl(&l_err); } // calculate total GPU power cap for N+1 (not in oversubscription) if(G_sysConfigData.pcap.system_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) { // Take all non-GPU power away from the N+1 power cap L_n_plus_1_mode_gpu_total_pcap = G_sysConfigData.pcap.system_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; // Add back in the power that will be dropped by processor DVFS and memory throttling and give to GPUs L_n_plus_1_mode_gpu_total_pcap += G_sysConfigData.total_proc_mem_pwr_drop_watts; } else { // This should not happen, the total non GPU power should never be higher than the N+1 mode cap // Log error and set GPUs to minimum power cap L_n_plus_1_mode_gpu_total_pcap = 0; // this will set minimum GPU power cap TRAC_ERR("amec_gpu_pcap: non GPU max power %dW is more than N+1 mode pwr limit %dW", G_sysConfigData.total_non_gpu_max_pwr_watts, G_sysConfigData.pcap.system_pcap); /* @ * @errortype * @moduleid AMEC_GPU_PCAP_MID * @reasoncode GPU_FAILURE * @userdata1 N+1 mode Power Cap watts * @userdata2 Total non-GPU power watts * @userdata4 ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE * @devdesc Total non-GPU power more than N+1 mode power cap * */ errlHndl_t l_err = createErrl(AMEC_GPU_PCAP_MID, GPU_FAILURE, ERC_GPU_N_PLUS_1_MODE_PCAP_CALC_FAILURE, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, G_sysConfigData.pcap.system_pcap, G_sysConfigData.total_non_gpu_max_pwr_watts); //Callout firmware addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_HIGH); commitErrl(&l_err); } } // if first run // Calculate the total GPU power cap for the current active limit and PSR // this only needs to be calculated if either the active limit or PSR changed if( (L_first_run) || (i_active_pcap_changed) || (L_psr != G_sysConfigData.psr) ) { L_psr = G_sysConfigData.psr; if(g_amec->pcap.active_node_pcap > G_sysConfigData.total_non_gpu_max_pwr_watts) { // Take all non-GPU power away from the active power cap L_active_psr_gpu_total_pcap = g_amec->pcap.active_node_pcap - G_sysConfigData.total_non_gpu_max_pwr_watts; // Add back in the power that will be dropped by processor DVFS and memory throttling based on the PSR // to give to GPUs L_active_psr_gpu_total_pcap += ( (L_psr / 100) * G_sysConfigData.total_proc_mem_pwr_drop_watts ); } else { // Set GPUs to minimum power cap L_active_psr_gpu_total_pcap = 0; TRAC_IMP("amec_gpu_pcap: non GPU max power %dW is more than active pwr limit %dW", G_sysConfigData.total_non_gpu_max_pwr_watts, g_amec->pcap.active_node_pcap); } // Total GPU power cap is the lower of system (N+1 or oversubscription depending on if in oversub) // and the active power limit. We do not need to always account for oversubscription since // the automatic hw power brake will assert to the GPUs if there is a problem when oversub is // entered from the time OCC can set and GPUs react to a new power limit if(i_oversubscription) { // system in oversubscription use N mode cap l_system_gpu_total_pcap = L_n_mode_gpu_total_pcap; } else { // system is not in oversubscription use N+1 mode cap l_system_gpu_total_pcap = L_n_plus_1_mode_gpu_total_pcap; } L_total_gpu_pcap = (l_system_gpu_total_pcap < L_active_psr_gpu_total_pcap) ? l_system_gpu_total_pcap : L_active_psr_gpu_total_pcap; // Divide the total equally across all GPUs in the system if(G_first_num_gpus_sys) { L_per_gpu_pcap = L_total_gpu_pcap / G_first_num_gpus_sys; } else { L_per_gpu_pcap = 0; TRAC_ERR("amec_gpu_pcap: Called with no GPUs present!"); } } // Setup to send new power limit to GPUs. The actual sending of GPU power limit will be handled by task_gpu_sm() for (i=0; i<MAX_NUM_GPU_PER_DOMAIN; i++) { // Before sending a GPU a power limit the power limits must be read from the GPU to know min/max GPU allows if( GPU_PRESENT(i) && g_amec->gpu[i].pcap.pwr_limits_read ) { l_gpu_cap_mw = L_per_gpu_pcap * 1000; // convert W to mW // GPU is present and have min/max power limits from GPU // clip the GPU power limit to min/max GPU limit if needed if(l_gpu_cap_mw < g_amec->gpu[i].pcap.gpu_min_pcap_mw) // clip to min? { l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_min_pcap_mw; } else if(l_gpu_cap_mw > g_amec->gpu[i].pcap.gpu_max_pcap_mw) // clip to max? { l_gpu_cap_mw = g_amec->gpu[i].pcap.gpu_max_pcap_mw; } // check if this is a new power limit if(g_amec->gpu[i].pcap.gpu_desired_pcap_mw != l_gpu_cap_mw) { if( (g_amec->gpu[i].pcap.gpu_desired_pcap_mw != 0) || (L_last_pcap_traced[i] != l_gpu_cap_mw) ) { L_last_pcap_traced[i] = l_gpu_cap_mw; TRAC_IMP("amec_gpu_pcap: Updating GPU%d desired pcap %dmW to %dmW", i, g_amec->gpu[i].pcap.gpu_desired_pcap_mw, l_gpu_cap_mw); } g_amec->gpu[i].pcap.gpu_desired_pcap_mw = l_gpu_cap_mw; } } } // for each GPU L_first_run = FALSE; }
// Function Specification // // Name: reset_state_request // // Description: Request Reset States // // End Function Specification void reset_state_request(uint8_t i_request) { //TODO: This needs to be changed so that G_reset_state operations are // atomic. switch(i_request) { case RESET_REQUESTED_DUE_TO_ERROR: // In case we want to just halt() if fw requests a reset, this is // the place to do it. It is disabled by default, and there is no // code to eanble it. if( G_halt_on_reset_request ) { TRAC_ERR("Halt()"); // This isn't modeled very well in simics. OCC will go into an // infinite loop, which eventually would crash Simics. HALT_WITH_FIR_SET; } // If we have TMGT comm, and we aren't already in reset, set the reset // state to reset to enter the reset state machine. if(G_reset_state < RESET_REQUESTED_DUE_TO_ERROR) { TRAC_IMP("Activating reset required state."); G_reset_state = RESET_REQUESTED_DUE_TO_ERROR; // Post the semaphore to wakeup the thread that // will put us into SAFE state. ssx_semaphore_post(&G_dcomThreadWakeupSem); // Set RTL Flags here too, depending how urgent it is that we stop // running tasks. rtl_set_run_mask(RTL_FLAG_RST_REQ); } break; case NOMINAL_REQUESTED_DUE_TO_ERROR: if(G_reset_state < NOMINAL_REQUESTED_DUE_TO_ERROR) { TRAC_ERR("Going to Nominal because of error"); // May need to add counter if multiple places request nominal G_reset_state = NOMINAL_REQUESTED_DUE_TO_ERROR; //TODO: Will need to set some flag or event here } break; case RESET_NOT_REQUESTED: if(G_reset_state == NOMINAL_REQUESTED_DUE_TO_ERROR) { TRAC_IMP("Clearing Nominal Reset State because of error"); // May need to add counter check if multiple places request nominal G_reset_state = RESET_NOT_REQUESTED; //TODO: Will need to clear some flag or event here } break; default: break; } }