// Called after a failure to read a DIMM temperature. The error will // be counted and if threshold is reached, and error will be created with // the DIMM as a callout and then set flag to trigger I2C reset void mark_dimm_failed() { const uint8_t port = G_dimm_sm_args.i2cPort; const uint8_t dimm = G_dimm_sm_args.dimm; INTR_TRAC_ERR("mark_dimm_failed: DIMM%04X failed in state/rc/count=0x%06X " "(ffdc 0x%08X%08X, completion_state 0x%02X)", DIMM_AND_PORT, (G_dimm_sm_args.state << 16) | (G_dimm_sm_args.error.rc << 8) | G_dimm[port][dimm].errorCount, WORD_HIGH(G_dimm_sm_args.error.ffdc), WORD_LOW(G_dimm_sm_args.error.ffdc), G_dimm_sm_request.request.completion_state); if (++G_dimm[port][dimm].errorCount > MAX_CONSECUTIVE_DIMM_RESETS) { // Disable collection on this DIMM, collect FFDC and log error G_dimm[port][dimm].disabled = true; INTR_TRAC_ERR("mark_dimm_failed: disabling DIMM%04X due to %d consecutive errors (state=%d)", DIMM_AND_PORT, G_dimm[port][dimm].errorCount, G_dimm_sm_args.state); errlHndl_t l_err = NULL; /* * @errortype * @moduleid DIMM_MID_MARK_DIMM_FAILED * @reasoncode DIMM_GPE_FAILURE * @userdata1 GPE returned rc code * @userdata4 ERC_DIMM_COMPLETE_FAILURE * @devdesc Failure writing dimm i2c mode register */ l_err = createErrl(DIMM_MID_MARK_DIMM_FAILED, DIMM_GPE_FAILURE, ERC_DIMM_COMPLETE_FAILURE, ERRL_SEV_INFORMATIONAL, NULL, DEFAULT_TRACE_SIZE, G_dimm_sm_args.error.rc, 0); addUsrDtlsToErrl(l_err, (uint8_t*)&G_dimm_sm_request.ffdc, sizeof(G_dimm_sm_request.ffdc), ERRL_STRUCT_VERSION_1, ERRL_USR_DTL_BINARY_DATA); addCalloutToErrl(l_err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.dimm_huids[port][dimm], ERRL_CALLOUT_PRIORITY_HIGH); commitErrl(&l_err); } // Reset DIMM I2C engine G_dimm_i2c_reset_required = true; G_dimm_i2c_reset_cause = port<<24 | dimm<<16 | (G_dimm_sm_args.error.rc & 0xFFFF); G_dimm_state = DIMM_STATE_RESET_MASTER; } // end mark_dimm_failed()
// Release the OCC lock indefinitely // This should be called when OCC goes into safe mode or will be reset // to allow the host to use the specified I2C engines. // If no engine is specified, locks for all I2C engines will be released void occ_i2c_lock_release(const uint8_t i_engine) { TRAC_INFO("occ_i2c_lock_release(engine %d) called", i_engine); if ((PIB_I2C_ENGINE_ALL == i_engine) || (PIB_I2C_ENGINE_E == i_engine) || (PIB_I2C_ENGINE_D == i_engine) || (PIB_I2C_ENGINE_C == i_engine)) { if ((PIB_I2C_ENGINE_E == i_engine) || (PIB_I2C_ENGINE_ALL == i_engine)) { update_i2c_lock(LOCK_RELEASE, PIB_I2C_ENGINE_E); } if ((PIB_I2C_ENGINE_D == i_engine) || (PIB_I2C_ENGINE_ALL == i_engine)) { update_i2c_lock(LOCK_RELEASE, PIB_I2C_ENGINE_D); } if ((PIB_I2C_ENGINE_C == i_engine) || (PIB_I2C_ENGINE_ALL == i_engine)) { update_i2c_lock(LOCK_RELEASE, PIB_I2C_ENGINE_C); } } else { INTR_TRAC_ERR("occ_i2c_lock_release: Invalid engine specified: 0x%02X", i_engine); } } // end occ_i2c_lock_release()
// Function Specification // // Name: task_dimm_sm // // Description: DIMM State Machine - Called every other tick to collect all of // the DIMM temperatures. // // Task Flags: RTL_FLAG_ACTIVE // // End Function Specification void task_dimm_sm(struct task *i_self) { static uint8_t L_dimmIndex = 0x00; static uint8_t L_dimmPort = 0x00; static uint8_t L_notReadyCount = 0; #define MAX_READ_ATTEMPT 3 static uint8_t L_readAttempt = 0; static bool L_readIssued = false; const uint8_t engine = G_sysConfigData.dimm_i2c_engine; static bool L_occ_owns_lock = true; if (G_mem_monitoring_allowed) { #ifdef DEBUG_LOCK_TESTING // TODO: remove testing code once SIMICS_FLAG_ISSUE removed SIMULATE_HOST(); #endif // First handle any outstanding I2C reset if (G_dimm_i2c_reset_required) { if ((G_dimm_state != DIMM_STATE_RESET_MASTER) && (check_for_i2c_failure())) { // I2C failure occurred during a reset... INTR_TRAC_ERR("task_dimm_sm: Failure during I2C reset - memory monitoring disabled"); // release I2C lock to the host for this engine and stop monitoring occ_i2c_lock_release(G_dimm_sm_args.i2cEngine); L_occ_owns_lock = false; G_mem_monitoring_allowed = false; // TODO: What else do we need to do? go to Safe State? } else { if (G_dimm_state == DIMM_STATE_INIT) { // Reset has completed successfully TRAC_INFO("task_dimm_sm: I2C reset completed"); G_dimm_i2c_reset_required = false; // Check if host needs I2C lock L_occ_owns_lock = check_and_update_i2c_lock(engine); } else { // Reset still in progress G_dimm_state = dimm_reset_sm(); } } } if (G_dimm_i2c_reset_required == false) { if ((L_occ_owns_lock == false) && ((DIMM_TICK == 0) || (DIMM_TICK == 8))) { // Check if host gave up the I2C lock L_occ_owns_lock = check_and_update_i2c_lock(engine); if (L_occ_owns_lock) { // Start over at the INIT state after receiving the lock G_dimm_state = DIMM_STATE_INIT; } } if (L_occ_owns_lock) { // Check for failure on prior operation if (check_for_i2c_failure()) { // If there was a failure, continue to the next DIMM (after I2c reset) use_next_dimm(&L_dimmPort, &L_dimmIndex); } uint8_t nextState = G_dimm_state; if (G_dimm_state == DIMM_STATE_INIT) { // Setup I2C Interrupt Mask Register DIMM_DBG("DIMM_STATE_INIT: (I2C Engine 0x%02X, Memory Type 0x%02X)", engine, G_sysConfigData.mem_type); G_dimm_sm_args.i2cEngine = engine; if (schedule_dimm_req(DIMM_STATE_INIT)) { nextState = DIMM_STATE_WRITE_MODE; } } else { bool intTriggered = check_for_i2c_interrupt(engine); if (intTriggered == false) { // Interrupt not generated, I2C operation may not have completed. // After MAX_TICK_COUNT_WAIT, attempt operation anyway. ++L_notReadyCount; } // Check if prior command completed (or timed out waiting for it) if (intTriggered || (L_notReadyCount > MAX_TICK_COUNT_WAIT)) { if (ASYNC_REQUEST_STATE_COMPLETE == G_dimm_sm_request.request.completion_state) { // IPC request completed, now check return code if (GPE_RC_SUCCESS == G_dimm_sm_args.error.rc) { // last request completed without error switch (G_dimm_sm_args.state) { case DIMM_STATE_INIT: // Save max I2C ports if (G_maxDimmPorts != G_dimm_sm_args.maxPorts) { G_maxDimmPorts = G_dimm_sm_args.maxPorts; DIMM_DBG("task_dimm_sm: updating DIMM Max I2C Ports to %d", G_maxDimmPorts); } break; case DIMM_STATE_READ_TEMP: if (L_readIssued) { const uint8_t port = G_dimm_sm_args.i2cPort; const uint8_t dimm = G_dimm_sm_args.dimm; // Last DIMM read completed, update sensor and clear error count DIMM_DBG("task_dimm_sm: Successfully read DIMM%04X temperature: %dC, tick %d", DIMM_AND_PORT, G_dimm_sm_args.temp, DIMM_TICK); g_amec->proc[0].memctl[port].centaur.dimm_temps[dimm].cur_temp = G_dimm_sm_args.temp; G_dimm[port][dimm].lastReading = ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000)); G_dimm[port][dimm].errorCount = 0; // Move on to next DIMM use_next_dimm(&L_dimmPort, &L_dimmIndex); L_readIssued = false; // Check if host needs the I2C lock L_occ_owns_lock = check_and_update_i2c_lock(engine); } break; default: // Nothing to do break; } } else { // last request did not return success switch (G_dimm_sm_args.state) { case DIMM_STATE_INITIATE_READ: if (++L_readAttempt < MAX_READ_ATTEMPT) { // The initiate_read didnt complete, retry DIMM_DBG("task_dimm_sm: initiate read didn't start (%d attempts)", L_readAttempt); // Force the read again G_dimm_state = DIMM_STATE_INITIATE_READ; nextState = G_dimm_state; } else { INTR_TRAC_ERR("task_dimm_sm: initiate read didn't start after %d attempts... forcing reset", L_readAttempt); mark_dimm_failed(); } break; case DIMM_STATE_READ_TEMP: if (L_readIssued) { if (++L_readAttempt < MAX_READ_ATTEMPT) { DIMM_DBG("task_dimm_sm: read didn't complete (%d attempts)", L_readAttempt); // Force the read again G_dimm_state = DIMM_STATE_READ_TEMP; nextState = G_dimm_state; } else { INTR_TRAC_ERR("task_dimm_sm: read did not complete after %d attempts... forcing reset", L_readAttempt); mark_dimm_failed(); } } break; default: // Nothing to do break; } } } } if (L_occ_owns_lock) { if (false == G_dimm_i2c_reset_required) { // Handle new DIMM state switch (G_dimm_state) { case DIMM_STATE_WRITE_MODE: // Only start a DIMM read on tick 0 or 8 if ((DIMM_TICK == 0) || (DIMM_TICK == 8)) { // If DIMM has huid/sensor then it should be present if ((0 != G_sysConfigData.dimm_huids[L_dimmPort][L_dimmIndex]) && (G_dimm[L_dimmPort][L_dimmIndex].disabled == false)) { G_dimm_sm_args.i2cPort = L_dimmPort; G_dimm_sm_args.dimm = L_dimmIndex; DIMM_DBG("task_dimm_sm: Starting collection for DIMM%04X at tick %d", DIMM_AND_PORT, DIMM_TICK); if (schedule_dimm_req(DIMM_STATE_WRITE_MODE)) { nextState = DIMM_STATE_WRITE_ADDR; } } else { // Skip current DIMM and move on to next one use_next_dimm(&L_dimmPort, &L_dimmIndex); } } break; case DIMM_STATE_WRITE_ADDR: if (intTriggered || (L_notReadyCount > MAX_TICK_COUNT_WAIT)) { G_dimm_sm_args.dimm = L_dimmIndex; G_dimm_sm_args.i2cAddr = get_dimm_addr(L_dimmIndex); if (schedule_dimm_req(DIMM_STATE_WRITE_ADDR)) { nextState = DIMM_STATE_INITIATE_READ; L_readAttempt = 0; L_readIssued = false; } } break; case DIMM_STATE_INITIATE_READ: if (intTriggered || (L_notReadyCount > MAX_TICK_COUNT_WAIT)) { G_dimm_sm_args.dimm = L_dimmIndex; if (schedule_dimm_req(DIMM_STATE_INITIATE_READ)) { nextState = DIMM_STATE_READ_TEMP; } } break; case DIMM_STATE_READ_TEMP: if (intTriggered || (L_notReadyCount > MAX_TICK_COUNT_WAIT)) { if (schedule_dimm_req(DIMM_STATE_READ_TEMP)) { L_readIssued = true; nextState = DIMM_STATE_WRITE_MODE; } } break; default: INTR_TRAC_ERR("task_dimm_sm: INVALID STATE: 0x%02X", G_dimm_state); break; } } else { // Previous op triggered reset nextState = dimm_reset_sm(); } } else { // OCC no longer holds the i2c lock (no DIMM state change required) nextState = G_dimm_state; } } if (nextState != G_dimm_state) { DIMM_DBG("task_dimm_sm: Updating state to 0x%02X (DIMM%04X) end of tick %d", nextState, (L_dimmPort<<8)|L_dimmIndex, DIMM_TICK); G_dimm_state = nextState; L_notReadyCount = 0; } } } } } // end task_dimm_sm()
// Handle the DIMM reset states uint8_t dimm_reset_sm() { uint8_t nextState = G_dimm_state; switch (G_dimm_state) { case DIMM_STATE_RESET_MASTER: if (DIMM_TICK == 0) { G_dimm_sm_args.i2cEngine = G_sysConfigData.dimm_i2c_engine; if (schedule_dimm_req(DIMM_STATE_RESET_MASTER)) { nextState = DIMM_STATE_RESET_SLAVE_P0; } } // else wait for tick 0 break; case DIMM_STATE_RESET_SLAVE_P0: G_dimm_sm_args.i2cPort = 0; if (schedule_dimm_req(DIMM_STATE_RESET_SLAVE_P0)) { nextState = DIMM_STATE_RESET_SLAVE_P0_WAIT; } break; case DIMM_STATE_RESET_SLAVE_P0_WAIT: // Delay to allow reset to complete DIMM_DBG("dimm_reset_sm: waiting during slave port 0 reset"); nextState = DIMM_STATE_RESET_SLAVE_P0_COMPLETE; break; case DIMM_STATE_RESET_SLAVE_P0_COMPLETE: if (schedule_dimm_req(DIMM_STATE_RESET_SLAVE_P0_COMPLETE)) { if (G_maxDimmPorts > 1) { nextState = DIMM_STATE_RESET_SLAVE_P1; } else { // If there is only one port, skip slave port 1 nextState = DIMM_STATE_INIT; DIMM_DBG("dimm_reset_sm: I2C reset completed (1 port)"); } } break; case DIMM_STATE_RESET_SLAVE_P1: G_dimm_sm_args.i2cPort = 1; if (schedule_dimm_req(DIMM_STATE_RESET_SLAVE_P1)) { nextState = DIMM_STATE_RESET_SLAVE_P1_WAIT; } break; case DIMM_STATE_RESET_SLAVE_P1_WAIT: // Delay to allow reset to complete nextState = DIMM_STATE_RESET_SLAVE_P1_COMPLETE; break; case DIMM_STATE_RESET_SLAVE_P1_COMPLETE: if (schedule_dimm_req(DIMM_STATE_RESET_SLAVE_P1_COMPLETE)) { nextState = DIMM_STATE_INIT; DIMM_DBG("dimm_reset_sm: I2C reset completed"); } break; default: INTR_TRAC_ERR("dimm_reset_sm: INVALID STATE: 0x%02X when reset is required", G_dimm_state); nextState = DIMM_STATE_RESET_MASTER; break; } return nextState; } // end dimm_reset_sm()
// Schedule a GPE request for the specified DIMM state bool schedule_dimm_req(uint8_t i_state) { bool l_scheduled = false; bool scheduleRequest = true; DIMM_DBG("dimm_sm called with state 0x%02X (tick=%d)", i_state, DIMM_TICK); if (!async_request_is_idle(&G_dimm_sm_request.request)) { INTR_TRAC_ERR("dimm_sm: request is not idle."); } else { switch(i_state) { // Init case DIMM_STATE_INIT: break; // Read DIMM temp case DIMM_STATE_WRITE_MODE: case DIMM_STATE_WRITE_ADDR: case DIMM_STATE_INITIATE_READ: case DIMM_STATE_READ_TEMP: break; // I2C reset case DIMM_STATE_RESET_MASTER: case DIMM_STATE_RESET_SLAVE_P0: case DIMM_STATE_RESET_SLAVE_P0_COMPLETE: case DIMM_STATE_RESET_SLAVE_P1: case DIMM_STATE_RESET_SLAVE_P1_COMPLETE: break; default: INTR_TRAC_ERR("dimm_sm: Invalid state (0x%02X)", i_state); errlHndl_t err = NULL; /* * @errortype * @moduleid DIMM_MID_DIMM_SM * @reasoncode DIMM_INVALID_STATE * @userdata1 DIMM state * @userdata2 0 * @devdesc Invalid DIMM I2C state requested */ err = createErrl(DIMM_MID_DIMM_SM, DIMM_INVALID_STATE, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, i_state, 0); // Request reset since this should never happen. REQUEST_RESET(err); scheduleRequest = false; break; } if (scheduleRequest) { // Clear errors and init common arguments for GPE G_dimm_sm_args.error.error = 0; G_dimm_sm_args.state = i_state; DIMM_DBG("dimm_sm: Scheduling GPE1 DIMM I2C state 0x%02X (tick %d)", i_state, DIMM_TICK); int l_rc = gpe_request_schedule(&G_dimm_sm_request); if (0 == l_rc) { l_scheduled = true; } else { errlHndl_t l_err = NULL; INTR_TRAC_ERR("dimm_sm: schedule failed w/rc=0x%08X (%d us)", l_rc, (int) ((ssx_timebase_get())/(SSX_TIMEBASE_FREQUENCY_HZ/1000000))); /* * @errortype * @moduleid DIMM_MID_DIMM_SM * @reasoncode SSX_GENERIC_FAILURE * @userdata1 GPE shedule returned rc code * @userdata2 state * @devdesc dimm_sm schedule failed */ l_err = createErrl(DIMM_MID_DIMM_SM, SSX_GENERIC_FAILURE, ERC_DIMM_SCHEDULE_FAILURE, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, l_rc, i_state); // Request reset since this should never happen. REQUEST_RESET(l_err); } } } return l_scheduled; } // end schedule_dimm_req()
// Check and update lock ownership for the specified i2c engine. // Returns true if OCC owns the lock, or false if host owns lock // // If host has requesed the i2c lock, it will be released and an external interrupt // will be generated/queued and function will return false. // If the host has not released the lock, function will return false. // If the host cleared its lock bit, OCC will take back ownership and return true. // bool check_and_update_i2c_lock(const uint8_t i_engine) { bool occ_owns_lock = true; if ((PIB_I2C_ENGINE_E == i_engine) || (PIB_I2C_ENGINE_D == i_engine) || (PIB_I2C_ENGINE_C == i_engine)) { bool needRetry = false; do { ocb_occflg_t original_occflags; original_occflags.value = in32(OCB_OCCFLG); LOCK_DBG("check_and_update_i2c_lock: I2C engine %d - host=%d, occ=%d (dimmTick=%d)", i_engine, original_occflags.fields.i2c_engine3_lock_host, original_occflags.fields.i2c_engine3_lock_occ, DIMM_TICK); if (occ_owns_i2c_lock(original_occflags, i_engine)) { if (host_wants_i2c_lock(original_occflags, i_engine)) { // Host requested lock, clear the OCC lock and notify host update_i2c_lock(LOCK_RELEASE, i_engine); occ_owns_lock = false; } // else OCC already owns the lock } else { // OCC does not own the lock occ_owns_lock = false; if (false == host_wants_i2c_lock(original_occflags, i_engine)) { // Host is not requesting the lock, acquire lock for OCC update_i2c_lock(LOCK_ACQUIRE, i_engine); occ_owns_lock = true; } // else Host still holds the lock } if ((occ_owns_lock) && (original_occflags.fields.i2c_engine1_lock_host == 0) && (original_occflags.fields.i2c_engine1_lock_occ == 0)) { // If neither lock bit is set, we must read back the register to make // sure the host did not set at same time (lock conflict) ocb_occflg_t verify_occflags; verify_occflags.value = in32(OCB_OCCFLG); if (host_wants_i2c_lock(verify_occflags, i_engine)) { // Host wrote their lock bit at same time, clear OCC lock and notify host update_i2c_lock(LOCK_RELEASE, i_engine); occ_owns_lock = false; } else { if (false == occ_owns_i2c_lock(verify_occflags, i_engine)) { // ERROR - OCC OWNERSHIP BIT DID NOT GET SET INTR_TRAC_ERR("check_and_update_i2c_lock: I2C lock bit did not get set (OCCFLAGS reg: 0x%08X)", verify_occflags.value); if (needRetry) { // After one retry, log error and goto safe /* * @errortype * @moduleid I2C_LOCK_UPDATE * @reasoncode OCI_WRITE_FAILURE * @userdata1 I2C engine number * @userdata2 OCC Flags register * @devdesc OCI write failure setting I2C ownership bit */ errlHndl_t err = createErrl(I2C_LOCK_UPDATE, OCI_WRITE_FAILURE, OCC_NO_EXTENDED_RC, ERRL_SEV_PREDICTIVE, NULL, DEFAULT_TRACE_SIZE, i_engine, verify_occflags.value); //Callout firmware addCalloutToErrl(err, ERRL_CALLOUT_TYPE_COMPONENT_ID, ERRL_COMPONENT_ID_FIRMWARE, ERRL_CALLOUT_PRIORITY_MED); //Callout processor addCalloutToErrl(err, ERRL_CALLOUT_TYPE_HUID, G_sysConfigData.proc_huid, ERRL_CALLOUT_PRIORITY_LOW); REQUEST_RESET(err); occ_owns_lock = false; break; } needRetry = true; } // else verify succeeded (OCC owns lock) } } } while (needRetry); } else { // Invalid engine INTR_TRAC_ERR("check_and_update_i2c_lock: Invalid engine specified: 0x%02X", i_engine); } return occ_owns_lock; } // end check_and_update_i2c_lock()