/* * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs, * because sufficient information was unavailable prior to correlation. * When the DE completes the pair, it uses this routine to retrieve the * correct FMRI. */ nvlist_t * cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd, uint64_t afsr) { nvlist_t *fmri; if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL) return (NULL); if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { nvlist_free(fmri); return (NULL); } return (fmri); }
static cmd_dimm_t * branch_dimm_create(fmd_hdl_t *hdl, char *dimm_unum, char **serids, size_t nserids) { nvlist_t *fmri; cmd_dimm_t *dimm; fmri = cmd_mem_fmri_create(dimm_unum, serids, nserids); if (fmri != NULL && (fmd_nvl_fmri_expand(hdl, fmri) == 0)) { dimm = cmd_dimm_create(hdl, fmri); if (dimm != NULL) { nvlist_free(fmri); return (dimm); } } nvlist_free(fmri); return (NULL); }
/* * The following is the common function for handling * memory UE with EID=MEM. * The error could be detected by either CPU/IO. */ cmd_evdisp_t opl_ue_mem(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, int hdlr_type) { nvlist_t *rsrc = NULL, *asru = NULL, *fru = NULL; uint64_t ubc_ue_log_reg, pa; cmd_page_t *page; if (nvlist_lookup_nvlist(nvl, FM_EREPORT_PAYLOAD_NAME_RESOURCE, &rsrc) != 0) return (CMD_EVD_BAD); switch (hdlr_type) { case CMD_OPL_HDLR_CPU: if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_SFAR, &pa) != 0) return (CMD_EVD_BAD); fmd_hdl_debug(hdl, "cmd_ue_mem: pa=%llx\n", (u_longlong_t)pa); break; case CMD_OPL_HDLR_IO: if (nvlist_lookup_uint64(nvl, OBERON_UBC_MUE, &ubc_ue_log_reg) != 0) return (CMD_EVD_BAD); pa = (ubc_ue_log_reg & UBC_UE_ADR_MASK); fmd_hdl_debug(hdl, "cmd_ue_mem: ue_log_reg=%llx\n", (u_longlong_t)ubc_ue_log_reg); fmd_hdl_debug(hdl, "cmd_ue_mem: pa=%llx\n", (u_longlong_t)pa); break; default: return (CMD_EVD_BAD); } if ((page = cmd_page_lookup(pa)) != NULL && page->page_case.cc_cp != NULL && fmd_case_solved(hdl, page->page_case.cc_cp)) return (CMD_EVD_REDUND); if (nvlist_dup(rsrc, &asru, 0) != 0) { fmd_hdl_debug(hdl, "opl_ue_mem nvlist dup failed\n"); return (CMD_EVD_BAD); } if (fmd_nvl_fmri_expand(hdl, asru) < 0) { nvlist_free(asru); CMD_STAT_BUMP(bad_mem_asru); return (CMD_EVD_BAD); } if ((fru = opl_mem_fru_create(hdl, asru)) == NULL) { nvlist_free(asru); return (CMD_EVD_BAD); } cmd_page_fault(hdl, asru, fru, ep, pa); nvlist_free(asru); nvlist_free(fru); return (CMD_EVD_OK); }
cmd_dimm_t * cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru) { cmd_dimm_t *dimm; const char *unum; nvlist_t *fmri; size_t nserids = 0; char **serids = NULL; if (!fmd_nvl_fmri_present(hdl, asru)) { fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n"); return (NULL); } if ((unum = cmd_fmri_get_unum(asru)) == NULL) { CMD_STAT_BUMP(bad_mem_asru); return (NULL); } #ifdef sun4v if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids, &nserids) != 0) { fmd_hdl_debug(hdl, "sun4v mem: FMRI does not" " have serial_ids\n"); CMD_STAT_BUMP(bad_mem_asru); return (NULL); } #endif fmri = cmd_mem_fmri_create(unum, serids, nserids); if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { CMD_STAT_BUMP(bad_mem_asru); nvlist_free(fmri); return (NULL); } fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum); CMD_STAT_BUMP(dimm_creat); dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); dimm->dimm_nodetype = CMD_NT_DIMM; dimm->dimm_version = CMD_DIMM_VERSION; cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s", unum); cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum); nvlist_free(fmri); (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum); dimm_attach_to_bank(hdl, dimm); cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0, CMD_DIMM_STAT_PREFIX); cmd_list_append(&cmd.cmd_dimms, dimm); cmd_dimm_dirty(hdl, dimm); return (dimm); }
/*ARGSUSED*/ int cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid, boolean_t repair) { cma_page_t *page; uint64_t pageaddr; const char *action = repair ? "unretire" : "retire"; int rc; nvlist_t *rsrc = NULL, *asrucp = NULL, *hcsp; (void) nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc); if (nvlist_dup(asru, &asrucp, 0) != 0) { fmd_hdl_debug(hdl, "page retire nvlist dup failed\n"); return (CMA_RA_FAILURE); } /* It should already be expanded, but we'll do it again anyway */ if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) { fmd_hdl_debug(hdl, "failed to expand page asru\n"); cma_stats.bad_flts.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_FAILURE); } if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) { fmd_hdl_debug(hdl, "page retire overtaken by events\n"); cma_stats.page_nonent.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_SUCCESS); } /* Figure out physaddr from resource or asru */ if (rsrc == NULL || nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcsp) != 0 || (nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0 && nvlist_lookup_uint64(hcsp, FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0)) { if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR, &pageaddr) != 0) { fmd_hdl_debug(hdl, "mem fault missing 'physaddr'\n"); cma_stats.bad_flts.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_FAILURE); } } if (repair) { if (!cma.cma_page_dounretire) { fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n", (u_longlong_t)pageaddr); cma_stats.page_supp.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_SUCCESS); } /* If unretire via topo fails, we fall back to legacy way */ if (rsrc == NULL || (rc = fmd_nvl_fmri_unretire(hdl, rsrc)) < 0) rc = cma_fmri_page_unretire(hdl, asrucp); } else { if (!cma.cma_page_doretire) { fmd_hdl_debug(hdl, "suppressed retire of page %llx\n", (u_longlong_t)pageaddr); cma_stats.page_supp.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_FAILURE); } /* If retire via topo fails, we fall back to legacy way */ if (rsrc == NULL || (rc = fmd_nvl_fmri_retire(hdl, rsrc)) < 0) rc = cma_fmri_page_retire(hdl, asrucp); } if (rc == FMD_AGENT_RETIRE_DONE) { fmd_hdl_debug(hdl, "%sd page 0x%llx\n", action, (u_longlong_t)pageaddr); if (repair) cma_stats.page_repairs.fmds_value.ui64++; else cma_stats.page_flts.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_SUCCESS); } else if (repair || rc != FMD_AGENT_RETIRE_ASYNC) { fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not " "retry: %s\n", action, (u_longlong_t)pageaddr, strerror(errno)); cma_stats.page_fails.fmds_value.ui64++; nvlist_free(asrucp); return (CMA_RA_FAILURE); } /* * The page didn't immediately retire. We'll need to periodically * check to see if it has been retired. */ fmd_hdl_debug(hdl, "page didn't retire - sleeping\n"); page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP); page->pg_addr = pageaddr; if (rsrc != NULL) (void) nvlist_dup(rsrc, &page->pg_rsrc, 0); page->pg_asru = asrucp; if (uuid != NULL) page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP); page->pg_next = cma.cma_pages; cma.cma_pages = page; if (cma.cma_page_timerid != 0) fmd_timer_remove(hdl, cma.cma_page_timerid); cma.cma_page_curdelay = cma.cma_page_mindelay; cma.cma_page_timerid = fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); /* Don't free asrucp here. This FMRI will be needed for retry. */ return (CMA_RA_FAILURE); }