void task_core_data( task_t * i_task ) { errlHndl_t l_err = NULL; //Error handler tracDesc_t l_trace = NULL; //Temporary trace descriptor int rc = 0; //return code bulk_core_data_task_t * l_bulk_core_data_ptr = (bulk_core_data_task_t *)i_task->data_ptr; GpeGetCoreDataParms * l_parms = (GpeGetCoreDataParms *)(l_bulk_core_data_ptr->gpe_req.parameter); gpe_bulk_core_data_t * l_temp = NULL; do { //First, check to see if the previous GPE request still running //A request is considered idle if it is not attached to any of the //asynchronous request queues if( !(async_request_is_idle(&l_bulk_core_data_ptr->gpe_req.request)) ) { //This should not happen unless there's a problem //Trace 1 time if( !G_queue_not_idle_traced ) { TRAC_ERR("Core data GPE is still running \n"); G_queue_not_idle_traced = TRUE; } break; } //Need to complete collecting data for all assigned cores from previous interval //and tick 0 is the current tick before collect data again. if( (l_bulk_core_data_ptr->current_core == l_bulk_core_data_ptr->end_core) && ((CURRENT_TICK & (MAX_NUM_TICKS - 1)) != 0) ) { PROC_DBG("Not collect data. Need to wait for tick.\n"); break; } //Check to see if the previously GPE request has successfully completed //A request is not considered complete until both the engine job //has finished without error and any callback has run to completion. if( async_request_completed(&l_bulk_core_data_ptr->gpe_req.request) && CORE_PRESENT(l_bulk_core_data_ptr->current_core) ) { //If the previous GPE request succeeded then swap core_data_ptr //with the global one. The gpe routine will write new data into //a buffer that is not being accessed by the RTLoop code. PROC_DBG( "Swap core_data_ptr [%x] with the global one\n", l_bulk_core_data_ptr->current_core ); //debug only #ifdef PROC_DEBUG print_core_status(l_bulk_core_data_ptr->current_core); print_core_data_sensors(l_bulk_core_data_ptr->current_core); #endif l_temp = l_bulk_core_data_ptr->core_data_ptr; l_bulk_core_data_ptr->core_data_ptr = G_core_data_ptrs[l_bulk_core_data_ptr->current_core]; G_core_data_ptrs[l_bulk_core_data_ptr->current_core] = l_temp; //Core data has been collected so set the bit in global mask. //AMEC code will know which cores to update sensors for. AMEC is //responsible for clearing the bit later on. G_updated_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core); // Presumptively clear the empath error mask G_empath_error_core_mask &= ~(CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core)); // The gpe_data collection code has to handle the workaround for // HW280375. Two new flags have been added to the OHA_RO_STATUS_REG // image to indicate whether the EMPATH collection failed, and // whether it was due to an "expected" error that we can ignore // (we can ignore the data as well), or an "unexpected" error that // we will create an informational log one time. // // The "expected" errors are very rare in practice, in fact we may // never even see them unless running a specific type of workload. // If you want to test the handling of expected errors compile the // GPE code with -DINJECT_HW280375_ERRORS which will inject an error // approximately every 1024 samples // // To determine if the expected error has occurred inspect the // CoreDataOha element of the CoreData structure written by the GPE // core data job. The OHA element contains the oha_ro_status_reg. // Inside the OHA status register is a 16 bit reserved field. // gpe_data.h defines two masks that can be applied against the // reserved field to check for these errors: // CORE_DATA_EXPECTED_EMPATH_ERROR // CORE_DATA_UNEXPECTED_EMPATH_ERROR // Also, a 4-bit PCB parity + error code is saved at bit position: // CORE_DATA_EMPATH_ERROR_LOCATION, formally the length is // specified by: CORE_DATA_EMPATH_ERROR_BITS gpe_bulk_core_data_t *l_core_data = G_core_data_ptrs[l_bulk_core_data_ptr->current_core]; // We will trace the errors, but only a certain number of // times, we will only log the unexpected error once. #define OCC_EMPATH_ERROR_THRESH 10 static uint32_t L_expected_emp_err_cnt = 0; static uint32_t L_unexpected_emp_err_cnt = 0; // Check the reserved field for the expected or the unexpected error flag if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR) || (l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR)) { // Indicate empath error on current core G_empath_error_core_mask |= CORE0_PRESENT_MASK >> (l_bulk_core_data_ptr->current_core); // Save the high and low order words of the OHA status reg uint32_t l_oha_reg_high = l_core_data->oha.oha_ro_status_reg.words.high_order; uint32_t l_oha_reg_low = l_core_data->oha.oha_ro_status_reg.words.low_order; // Handle each error case if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_EXPECTED_EMPATH_ERROR) && (L_expected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH)) { L_expected_emp_err_cnt++; TRAC_IMP("Expected empath collection error occurred %d time(s)! Core = %d", L_expected_emp_err_cnt, l_bulk_core_data_ptr->current_core); TRAC_IMP("OHA status register: 0x%4.4x%4.4x", l_oha_reg_high, l_oha_reg_low); } if ((l_core_data->oha.oha_ro_status_reg.fields._reserved0 & CORE_DATA_UNEXPECTED_EMPATH_ERROR) && (L_unexpected_emp_err_cnt < OCC_EMPATH_ERROR_THRESH)) { L_unexpected_emp_err_cnt++; TRAC_ERR("Unexpected empath collection error occurred %d time(s)! Core = %d", L_unexpected_emp_err_cnt, l_bulk_core_data_ptr->current_core); TRAC_ERR("OHA status register: 0x%4.4x%4.4x", l_oha_reg_high, l_oha_reg_low); // Create and commit an informational error the first // time this occurs. if (L_unexpected_emp_err_cnt == 1) { TRAC_IMP("Logging unexpected empath collection error 1 time only."); /* * @errortype * @moduleid PROC_TASK_CORE_DATA_MOD * @reasoncode INTERNAL_HW_FAILURE * @userdata1 OHA status reg high * @userdata2 OHA status reg low * @userdata4 ERC_PROC_CORE_DATA_EMPATH_ERROR * @devdesc An unexpected error occurred while * collecting core empath data. */ l_err = createErrl( PROC_TASK_CORE_DATA_MOD, //modId INTERNAL_HW_FAILURE, //reason code ERC_PROC_CORE_DATA_EMPATH_ERROR, //Extended reason code ERRL_SEV_INFORMATIONAL, //Severity NULL, //Trace DEFAULT_TRACE_SIZE, //Trace Size l_oha_reg_high, //userdata1 l_oha_reg_low); //userdata2 commitErrl(&l_err); } } } }
void task_centaur_control( task_t * i_task ) { errlHndl_t l_err = NULL; // Error handler int rc = 0; // Return code uint32_t l_cent; amec_centaur_t *l_cent_ptr = NULL; static uint8_t L_scom_timeout[MAX_NUM_CENTAURS] = {0}; //track # of consecutive failures static bool L_gpe_scheduled = FALSE; static uint8_t L_gpe_fail_logged = 0; static bool L_gpe_idle_traced = FALSE; static bool L_gpe_had_1_tick = FALSE; // Pointer to the task data structure centaur_control_task_t * l_centControlTask = (centaur_control_task_t *) i_task->data_ptr; // Pointer to parameter field for GPE request GpeScomParms * l_parms = (GpeScomParms *)(l_centControlTask->gpe_req.parameter); do { l_cent = l_centControlTask->curCentaur; l_cent_ptr = &g_amec->proc[0].memctl[l_cent].centaur; //First, check to see if the previous GPE request still running //A request is considered idle if it is not attached to any of the //asynchronous request queues if( !(async_request_is_idle(&l_centControlTask->gpe_req.request)) ) { L_scom_timeout[l_cent]++; //This can happen due to variability in when the task runs if(!L_gpe_idle_traced && L_gpe_had_1_tick) { TRAC_INFO("task_centaur_control: GPE is still running. cent[%d]", l_cent); l_centControlTask->traceThresholdFlags |= CENTAUR_CONTROL_GPE_STILL_RUNNING; L_gpe_idle_traced = TRUE; } L_gpe_had_1_tick = TRUE; break; } else { //Request is idle L_gpe_had_1_tick = FALSE; if(L_gpe_idle_traced) { TRAC_INFO("task_centaur_control: GPE completed. cent[%d]", l_cent); L_gpe_idle_traced = FALSE; } } //check scom status if(L_gpe_scheduled) { if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) { if(!(L_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent))) { // Check if the centaur has a channel checkstop. If it does, // then do not log any errors. We also don't want to throttle // a centaur that is in this condition. if(!(cent_chan_checkstop(l_cent))) { L_gpe_fail_logged |= CENTAUR0_PRESENT_MASK >> l_cent; TRAC_ERR("task_centaur_control: gpe_scom_centaur failed. l_cent=%d rc=%x, index=0x%08x", l_cent, l_parms->rc, l_parms->errorIndex); /* @ * @errortype * @moduleid CENT_TASK_CONTROL_MOD * @reasoncode CENT_SCOM_ERROR * @userdata1 rc - Return code of scom operation * @userdata2 index of scom operation that failed * @userdata4 OCC_NO_EXTENDED_RC * @devdesc OCC access to centaur failed */ l_err = createErrl( CENT_TASK_CONTROL_MOD, // modId CENT_SCOM_ERROR, // reasoncode OCC_NO_EXTENDED_RC, // Extended reason code ERRL_SEV_PREDICTIVE, // Severity NULL, // Trace Buf DEFAULT_TRACE_SIZE, // Trace Size l_parms->rc, // userdata1 l_parms->errorIndex // userdata2 ); addUsrDtlsToErrl(l_err, //io_err (uint8_t *) &(l_centControlTask->gpe_req.ffdc), //i_dataPtr, sizeof(PoreFfdc), //i_size ERRL_USR_DTL_STRUCT_VERSION_1, //version ERRL_USR_DTL_BINARY_DATA); //type //callout the centaur addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.centaur_huids[l_cent], ERRL_CALLOUT_PRIORITY_MED); //callout the processor addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.proc_huid, ERRL_CALLOUT_PRIORITY_MED); commitErrl(&l_err); } }//if(l_gpe_fail_logged & (CENTAUR0_PRESENT_MASK >> l_cent)) //Request failed. Keep count of failures and request a reset if we reach a //max retry count L_scom_timeout[l_cent]++; if(L_scom_timeout[l_cent] == CENTAUR_CONTROL_SCOM_TIMEOUT) { break; } }//if(!async_request_completed(&l_centControlTask->gpe_req.request) || l_parms->rc) else { //request completed successfully. reset the timeout. L_scom_timeout[l_cent] = 0; } }//if(L_gpe_scheduled)
int _centaur_configuration_create(int i_bar, int i_slave, int i_setup) { CentaurConfiguration config; int i, designatedSync, diffInit; int64_t rc; /* Must be copied to global struct. */ mcfgpr_t mcfgpr; mcifir_t mcifir; mcsmode0_t mcsmode0; pba_slvctln_t slvctl; uint64_t diffMask, addrAccum, bar, mask, base; PoreFlex request; // Start by clearing the local structure and setting the error flag. memset(&config, 0, sizeof(config)); config.configRc = CENTAUR_NOT_CONFIGURED; designatedSync = -1; do { // Basic consistency checks if ((i_bar < 0) || (i_bar >= PBA_BARS) || (i_slave < 0) || (i_slave >= PBA_SLAVES)) { rc = CENTAUR_INVALID_ARGUMENT; break; } // Create the setups for the GPE procedures. The 'dataParms' are the // setup for accessing the Centaur sensor cache. The 'scomParms' are // the setup for accessing Centaur SCOMs. rc = gpe_pba_parms_create(&(config.dataParms), PBA_SLAVE_PORE_GPE, PBA_WRITE_TTYPE_CI_PR_W, PBA_WRITE_TTYPE_DC, PBA_READ_TTYPE_CL_RD_NC); if (rc) { rc = CENTAUR_DATA_SETUP_ERROR; break; } rc = gpe_pba_parms_create(&(config.scomParms), PBA_SLAVE_PORE_GPE, PBA_WRITE_TTYPE_CI_PR_W, PBA_WRITE_TTYPE_DC, PBA_READ_TTYPE_CI_PR_RD); if (rc) { rc = CENTAUR_SCOM_SETUP_ERROR; break; } // Go into each MCS on the chip, and for all enabled MCS get a couple // of SCOMs and check configuration items for correctness. If any of // the Centaur are configured, exactly one of the MCS must be // designated to receive the SYNC commands. // Note that the code uniformly treats SCOM failures of the MCFGPR // registers as an unconfigured Centaur. This works both for Murano, // which only defines the final 4 MCS, as well as for our VBU models // where some of the "valid" MCS are not in the simulation models. for (i = 0; i < PGP_NCENTAUR; i++) { // SW273928: New function added for FW820, when centaur has channel // checkstop, we consider centaur is not usable so treat it as // deconfigured. Note that the current implementation assumes when // centaur is dead, its mcs is also dead, which is wrong. However, // it only concerns when MCS happens to be the SYNC master because // the gpe procedure only tries to talk to centaurs regardless what // MCS status it knows about. In this particular case, // the procedure will turn on SYNC on a different MCS with // valid centaur. According to Eric Retter, it would be ok for // HW to have more MCS turned on as SYNC master as long as FW // only send SYNC command to one of them. rc = _getscom(MCS_ADDRESS(MCIFIR, i), &(mcifir.value), SCOM_TIMEOUT); if (rc) { rc = 0; config.baseAddress[i] = 0; continue; } if (mcifir.fields.channel_fail_signal_active) continue; rc = _getscom(MCS_ADDRESS(MCFGPR, i), &(mcfgpr.value), SCOM_TIMEOUT); if (rc) { rc = 0; config.baseAddress[i] = 0; continue; } if (!mcfgpr.fields.mcfgprq_valid) continue; rc = _getscom(MCS_ADDRESS(MCSMODE0, i), &(mcsmode0.value), SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n", (uint32_t)rc, i); rc = CENTAUR_MCSMODE0_SCOM_FAILURE; break; } // We require that the MCFGRP_19_IS_HO_BIT be set in the mode // register. We do not support the option of this bit not being // set, and all of our procedures will set bit 19 of the PowerBus // address to indicate that OCC is making the access. if (!mcsmode0.fields.mcfgrp_19_is_ho_bit) { PRINTD("MCSMODE0(%d).mcfgrp_19_is_ho_bit == 0\n", i); rc = CENTAUR_MCSMODE0_19_FAILURE; break; } // The 14-bit base-address is moved to begin at bit 14 in the // 64-bit PowerBus address. The low-order bit of this address (bit // 19 mentioned above which is bit 27 as an address bit) must be 0 // - otherwise there is confusion over who's controlling this // bit. config.baseAddress[i] = ((uint64_t)(mcfgpr.fields.mcfgprq_base_address)) << (64 - 14 - 14); if (config.baseAddress[i] & 0x0000001000000000ull) { PRINTD("Centaur base address %d has bit 27 set\n", i); rc = CENTAUR_ADDRESS_27_FAILURE; break; } // If this MCS is configured to be the designated SYNC unit, it // must be the only one. if (mcsmode0.fields.enable_centaur_sync) { if (designatedSync > 0) { PRINTD("Both MCS %d and %d are designated " "for Centaur Sync\n", designatedSync, i); rc = CENTAUR_MULTIPLE_DESIGNATED_SYNC; break; } else { designatedSync = i; } } // Add the Centaur to the configuration config.config |= (CHIP_CONFIG_MCS(i) | CHIP_CONFIG_CENTAUR(i)); } if (rc) break; // If Centaur are configured, make sure at least one of the MCS will // handle the SYNC. If so, convert its base address into an address // for issuing SYNC commands by setting bits 27 (OCC) 28 and 29 // (Sync), then insert this address into the extended address field of // a PBA slave control register image. gsc_scom_centaur() then merges // this extended address into the PBA slave control register (which // has been set up for Centaur SCOM) to do the SYNC. // In the override mode (i_setup > 1) we tag the first valid MCS // to recieve the sync if the firmware has not set it up correctly. if (config.config) { if (designatedSync < 0) { if (i_setup <= 1) { PRINTD("No MCS is designated for Centaur SYNC\n"); rc = CENTAUR_NO_DESIGNATED_SYNC; break; } else { designatedSync = cntlz32(left_justify_mcs_config(config.config)); rc = _getscom(MCS_ADDRESS(MCSMODE0, designatedSync), &(mcsmode0.value), SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n", (uint32_t)rc, designatedSync); rc = CENTAUR_MCSMODE0_SCOM_FAILURE; break; } mcsmode0.fields.enable_centaur_sync = 1; rc = _putscom(MCS_ADDRESS(MCSMODE0, designatedSync), mcsmode0.value, SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing MCSMODE0(%d)\n", (uint32_t)rc, designatedSync); rc = CENTAUR_MCSMODE0_SCOM_FAILURE; break; } } } base = config.baseAddress[designatedSync] | 0x0000001c00000000ull; slvctl.value = 0; slvctl.fields.extaddr = (base & 0x000001fff8000000ull) >> 27; config.syncSlaveControl = slvctl.value; } // At this point we have one or more enabled MCS and they pass the // initial configuration sniff test. We can now implement the option // to configure the PBA BAR and BAR MASK correctly to allow access to // these Centaur. We do this by computing the minimum BAR mask that // covers all of the Centaur base addresses. This is done by // accumulating a difference mask of the base addresses and finding // the first set bit in the mask. // // Note that we do the configuration here on demand, but always do the // correctness checking as the next step. if (i_setup && (config.config != 0)) { diffInit = 0; diffMask = 0; /* GCC happiness */ addrAccum = 0; /* GCC happiness */ for (i = 0; i < PGP_NCENTAUR; i++) { if (config.baseAddress[i] != 0) { if (!diffInit) { diffInit = 1; diffMask = 0; addrAccum = config.baseAddress[i]; } else { diffMask |= (config.baseAddress[i] ^ addrAccum); addrAccum |= config.baseAddress[i]; } if (0) { // Debug printk("i:%d baseAddress: 0x%016llx " "diffMask: 0x%016llx, addrAccum: 0x%016llx\n", i, config.baseAddress[i], diffMask, addrAccum); } } } // The mask must cover all differences - and must also have at // least bit 27 set. The mask register contains only the mask. The // BAR is set to the accumulated address outside of the mask. The // BAR also contains a scope field which defaults to 0 (Nodal // Scope) for Centaur inband access. diffMask |= 0x0000001000000000ull; mask = ((1ull << (64 - cntlz64(diffMask))) - 1) & PBA_BARMSKN_MASK_MASK; rc = _putscom(PBA_BARMSKN(i_bar), mask, SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARMSKN(%d)\n", (uint32_t)rc, i_bar); rc = CENTAUR_BARMSKN_PUTSCOM_FAILURE; break; } rc = _putscom(PBA_BARN(i_bar), addrAccum & ~mask, SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARN(%d)\n", (uint32_t)rc, i_bar); rc = CENTAUR_BARN_PUTSCOM_FAILURE; break; } } // Do an independent check that every Centaur base address // can be generated by the combination of the current BAR and // BAR Mask, along with the initial requirement that the mask must // include at least bits 27:43. if (config.config != 0) { rc = _getscom(PBA_BARN(i_bar), &bar, SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARN(%d)\n", (uint32_t)rc, i_bar); rc = CENTAUR_BARN_GETSCOM_FAILURE; break; } rc = _getscom(PBA_BARMSKN(i_bar), &mask, SCOM_TIMEOUT); if (rc) { PRINTD("Unexpected rc = 0x%08x SCOMing PBA_BARMSKN(%d)\n", (uint32_t)rc, i_bar); rc = CENTAUR_BARMSKN_GETSCOM_FAILURE; break; } bar = bar & PBA_BARN_ADDR_MASK; mask = mask & PBA_BARMSKN_MASK_MASK; if ((mask & 0x0000001ffff00000ull) != 0x0000001ffff00000ull) { PRINTD("PBA BAR mask (%d) does not cover bits 27:43\n", i_bar); rc = CENTAUR_MASK_ERROR; break; } for (i = 0; i < PGP_NCENTAUR; i++) { if (config.baseAddress[i] != 0) { if ((config.baseAddress[i] & ~mask) != (bar & ~mask)) { PRINTD("BAR/Mask (%d) error for MCS/Centaur %d\n" " base = 0x%016llx\n" " bar = 0x%016llx\n" " mask = 0x%016llx\n", i_bar, i, config.baseAddress[i], bar, mask); rc = CENTAUR_BAR_MASK_ERROR; break; } } } if (rc) break; } // At this point the structure is initialized well-enough that it can // be used by gpe_scom_centaur(). We run gpe_scom_centaur() to collect // the CFAM ids of the chips. Prior to this we copy our local copy // into the global read-only data structure. (Note that GPE can DMA // under the OCC TLB memory protection.) In order for // gpe_scom_centaur() to run the global configuration must be valid // (configRc == 0) - so we provisionally mark it valid (and will // invalidate it later if errors occur here). // Note however that if no Centaur are present then we're already // done. // It's assumed that this procedure is being run before threads have // started, therefore we must poll for completion of the GPE program. // Assuming no contention for GPE1 this procedure should take a few // microseconds at most to complete. if (0) { // Debug for Simics - only enable MCS 5 config.baseAddress[0] = config.baseAddress[1] = config.baseAddress[2] = config.baseAddress[3] = config.baseAddress[4] = config.baseAddress[6] = config.baseAddress[7] = 0; } config.configRc = 0; memcpy_real(&G_centaurConfiguration, &config, sizeof(config)); if (config.config == 0) break; S_scomList.scom = CENTAUR_DEVICE_ID; S_scomList.commandType = GPE_SCOM_READ_VECTOR; S_scomList.pData = G_centaurConfiguration.deviceId; S_parms.scomList = CAST_POINTER(uint64_t, &S_scomList); S_parms.entries = 1; S_parms.options = 0; pore_flex_create(&request, &G_pore_gpe1_queue, gpe_scom_centaur, (uint32_t)(&S_parms), SSX_MILLISECONDS(10), /* Timeout */ 0, 0, 0); rc = pore_flex_schedule(&request); if (rc) break; while (!async_request_is_idle((AsyncRequest*)(&request))); if (!async_request_completed((AsyncRequest*)(&request)) || (S_parms.rc != 0)) { PRINTD("gpe_scom_centaur() for CENTAUR_DEVICE_ID failed:\n" " Async state = 0x%02x\n" " gpe_scom_centaur() rc = %u\n" " gpe_scom_centaur() errorIndex = %d\n", ((AsyncRequest*)(&request))->state, S_parms.rc, S_parms.errorIndex); rc = CENTAUR_READ_TPC_ID_FAILURE; } if (0) { // Debug slvctl.value = G_gsc_lastSlaveControl; PRINTD("centaur_configuration_create:Debug\n" " Last SCOM (PowerBus) address = 0x%016llx\n" " Last Slave Control = 0x%016llx\n" " Extended Address (positioned) = 0x%016llx\n" " Last OCI Address = 0x%016llx\n", G_gsc_lastScomAddress, G_gsc_lastSlaveControl, (unsigned long long)(slvctl.fields.extaddr) << (64 - 23 - 14), G_gsc_lastOciAddress); } } while (0); // Copy the final RC into the global structure and done. memcpy_real(&(G_centaurConfiguration.configRc), &rc, sizeof(rc)); return rc; }
// Schedule a GPE request for the specified DIMM state bool schedule_dimm_req(uint8_t i_state) { bool l_scheduled = false; bool scheduleRequest = true; DIMM_DBG("dimm_sm called with state 0x%02X (tick=%d)", i_state, DIMM_TICK); if (!async_request_is_idle(&G_dimm_sm_request.request)) { INTR_TRAC_ERR("dimm_sm: request is not idle."); } else { switch(i_state) { // Init case DIMM_STATE_INIT: break; // Read DIMM temp case DIMM_STATE_WRITE_MODE: case DIMM_STATE_WRITE_ADDR: case DIMM_STATE_INITIATE_READ: case DIMM_STATE_READ_TEMP: break; // I2C reset case DIMM_STATE_RESET_MASTER: case DIMM_STATE_RESET_SLAVE_P0: case DIMM_STATE_RESET_SLAVE_P0_COMPLETE: case DIMM_STATE_RESET_SLAVE_P1: case DIMM_STATE_RESET_SLAVE_P1_COMPLETE: break; default: INTR_TRAC_ERR("dimm_sm: Invalid state (0x%02X)", i_state); errlHndl_t err = NULL; /* * @errortype * @moduleid DIMM_MID_DIMM_SM * @reasoncode DIMM_INVALID_STATE * @userdata1 DIMM state * @userdata2 0 * @devdesc Invalid DIMM I2C state requested */ err = createErrl(DIMM_MID_DIMM_SM, DIMM_INVALID_STATE, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, i_state, 0); // Request reset since this should never happen. REQUEST_RESET(err); scheduleRequest = false; break; } if (scheduleRequest) { // Clear errors and init common arguments for GPE G_dimm_sm_args.error.error = 0; G_dimm_sm_args.state = i_state; DIMM_DBG("dimm_sm: Scheduling GPE1 DIMM I2C state 0x%02X (tick %d)", i_state, DIMM_TICK); int l_rc = gpe_request_schedule(&G_dimm_sm_request); if (0 == l_rc) { l_scheduled = true; } else { errlHndl_t l_err = NULL; INTR_TRAC_ERR("dimm_sm: schedule failed w/rc=0x%08X (%d us)", l_rc, (int) ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000))); /* * @errortype * @moduleid DIMM_MID_DIMM_SM * @reasoncode SSX_GENERIC_FAILURE * @userdata1 GPE shedule returned rc code * @userdata2 state * @devdesc dimm_sm schedule failed */ l_err = createErrl(DIMM_MID_DIMM_SM, SSX_GENERIC_FAILURE, ERC_DIMM_SCHEDULE_FAILURE, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, l_rc, i_state); // Request reset since this should never happen. REQUEST_RESET(l_err); } } } return l_scheduled; } // end schedule_dimm_req()
// Function Specification // // Name: task_dcom_tx_slv_outbox // // Description: Copy slave outboxes from SRAM to main memory // so slave can send data to master // // Task Flags: RTL_FLAG_NONMSTR, RTL_FLAG_MSTR, RTL_FLAG_OBS, RTL_FLAG_ACTIVE, // RTL_FLAG_NOAPSS, RTL_FLAG_RUN, RTL_FLAG_MSTR_READY // // End Function Specification void task_dcom_tx_slv_outbox( task_t *i_self) { static bool l_error = FALSE; uint32_t l_orc = OCC_SUCCESS_REASON_CODE; uint32_t l_orc_ext = OCC_NO_EXTENDED_RC; // Use a static local bool to track whether the BCE request used // here has ever been successfully created at least once static bool L_bce_slv_outbox_tx_request_created_once = FALSE; DCOM_DBG("3. TX Slave Outboxes\n"); do { // Build/setup outbox uint32_t l_addr_in_mem = dcom_build_slv_outbox(); uint32_t l_ssxrc = 0; // See dcomMasterRx.c/task_dcom_rx_slv_outboxes for details on the // checking done here before creating and scheduling the request. bool l_proceed_with_request_and_schedule = FALSE; int l_req_idle = async_request_is_idle(&(G_slv_outbox_tx_pba_request.request)); int l_req_complete = async_request_completed(&(G_slv_outbox_tx_pba_request.request)); if (!L_bce_slv_outbox_tx_request_created_once) { // Do this case first, all other cases assume that this is // true! // This is the first time we have created a request so // always proceed with request create and schedule l_proceed_with_request_and_schedule = TRUE; } else if (l_req_idle && l_req_complete) { // Most likely case first. The request was created // and scheduled and has completed without error. Proceed. // Proceed with request create and schedule. l_proceed_with_request_and_schedule = TRUE; } else if (l_req_idle && !l_req_complete) { // There was an error on the schedule request or the request // was scheduled but was canceled, killed or errored out. // Proceed with request create and schedule. l_proceed_with_request_and_schedule = TRUE; // Trace important information from the request TRAC_INFO("BCE slv outbox tx request idle but not complete, \ callback_rc=%d options=0x%x state=0x%x abort_state=0x%x \ completion_state=0x%x", G_slv_outbox_tx_pba_request.request.callback_rc, G_slv_outbox_tx_pba_request.request.options, G_slv_outbox_tx_pba_request.request.state, G_slv_outbox_tx_pba_request.request.abort_state, G_slv_outbox_tx_pba_request.request.completion_state); TRAC_INFO("Proceeding with BCE slv outbox tx request and schedule"); } else if (!l_req_idle && !l_req_complete) { // The request was created and scheduled but is still in // progress or still enqueued OR there was some error // creating the request so it was never scheduled. The latter // case is unlikely and will generate an error message when // it occurs. It will also have to happen after the request // was created at least once or we'll never get here. If the // request does fail though before the state parms in the // request are reset (like a bad parameter error), then this // represents a hang condition that we can't recover from. // DO NOT proceed with request create and schedule. l_proceed_with_request_and_schedule = FALSE; // Trace important information from the request TRAC_INFO("BCE slv outbox tx request not idle and not complete, \ callback_rc=%d options=0x%x state=0x%x abort_state=0x%x \ completion_state=0x%x", G_slv_outbox_tx_pba_request.request.callback_rc, G_slv_outbox_tx_pba_request.request.options, G_slv_outbox_tx_pba_request.request.state, G_slv_outbox_tx_pba_request.request.abort_state, G_slv_outbox_tx_pba_request.request.completion_state); TRAC_INFO("NOT proceeding with BCE slv outbox tx request and schedule"); }
void amec_update_fw_sensors(void) { errlHndl_t l_err = NULL; int rc = 0; int rc2 = 0; static bool l_first_call = TRUE; bool l_gpe0_idle, l_gpe1_idle; static int L_consec_trace_count = 0; // ------------------------------------------------------ // Update OCC Firmware Sensors from last tick // ------------------------------------------------------ int l_last_state = G_fw_timing.amess_state; // RTLtickdur = duration of last tick's RTL ISR (max = 250us) sensor_update( AMECSENSOR_PTR(RTLtickdur), G_fw_timing.rtl_dur); // AMEintdur = duration of last tick's AMEC portion of RTL ISR sensor_update( AMECSENSOR_PTR(AMEintdur), G_fw_timing.ameint_dur); // AMESSdurX = duration of last tick's AMEC state if(l_last_state >= NUM_AMEC_SMH_STATES) { // Sanity check. Trace this out, even though it should never happen. TRAC_INFO("AMEC State Invalid, Sensor Not Updated"); } else { // AMESSdurX = duration of last tick's AMEC state sensor_update( AMECSENSOR_ARRAY_PTR(AMESSdur0, l_last_state), G_fw_timing.amess_dur); } // ------------------------------------------------------ // Kick off GPE programs to track WorstCase time in GPE // and update the sensors. // ------------------------------------------------------ if( (NULL != G_fw_timing.gpe0_timing_request) && (NULL != G_fw_timing.gpe1_timing_request) ) { //Check if both GPE engines were able to complete the last GPE job on //the queue within 1 tick. l_gpe0_idle = async_request_is_idle(&G_fw_timing.gpe0_timing_request->request); l_gpe1_idle = async_request_is_idle(&G_fw_timing.gpe1_timing_request->request); if(l_gpe0_idle && l_gpe1_idle) { //reset the consecutive trace count L_consec_trace_count = 0; //Both GPE engines finished on time. Now check if they were //successful too. if( async_request_completed(&(G_fw_timing.gpe0_timing_request->request)) && async_request_completed(&(G_fw_timing.gpe1_timing_request->request)) ) { // GPEtickdur0 = duration of last tick's PORE-GPE0 duration sensor_update( AMECSENSOR_PTR(GPEtickdur0), G_fw_timing.gpe_dur[0]); // GPEtickdur1 = duration of last tick's PORE-GPE1 duration sensor_update( AMECSENSOR_PTR(GPEtickdur1), G_fw_timing.gpe_dur[1]); } else { //This case is expected on the first call of the function. //After that, this should not happen. if(!l_first_call) { //Note: FFDC for this case is gathered by each task //responsible for a GPE job. TRAC_INFO("GPE task idle but GPE task did not complete"); } l_first_call = FALSE; } // Update Time used to measure GPE duration. G_fw_timing.rtl_start_gpe = G_fw_timing.rtl_start; // Schedule the GPE Routines that will run and update the worst // case timings (via callback) after they complete. These GPE // routines are the last GPE routines added to the queue // during the RTL tick. rc = pore_flex_schedule(G_fw_timing.gpe0_timing_request); rc2 = pore_flex_schedule(G_fw_timing.gpe1_timing_request); if(rc || rc2) { /* @ * @errortype * @moduleid AMEC_UPDATE_FW_SENSORS * @reasoncode SSX_GENERIC_FAILURE * @userdata1 return code - gpe0 * @userdata2 return code - gpe1 * @userdata4 OCC_NO_EXTENDED_RC * @devdesc Failure to schedule PORE-GPE poreFlex object for FW timing * analysis. */ l_err = createErrl( AMEC_UPDATE_FW_SENSORS, //modId SSX_GENERIC_FAILURE, //reasoncode OCC_NO_EXTENDED_RC, //Extended reason code ERRL_SEV_INFORMATIONAL, //Severity NULL, //Trace Buf DEFAULT_TRACE_SIZE, //Trace Size rc, //userdata1 rc2); //userdata2 // commit error log commitErrl( &l_err ); } } else if(L_consec_trace_count < MAX_CONSEC_TRACE) { uint64_t l_dbg1; // Reset will eventually be requested due to not having power measurement // data after X ticks, but add some additional FFDC to the trace that // will tell us what GPE job is currently executing. if(!l_gpe0_idle) { l_dbg1 = in64(PORE_GPE0_DBG1); TRAC_ERR("GPE0 programs did not complete within one tick. DBG1[0x%08x%08x]", l_dbg1 >> 32, l_dbg1 & 0x00000000ffffffffull); }