/* * The cmd_dimm_t structure created for a DIMM in a branch never has a * Jxxx in its unum; the cmd_dimm_t structure created for a DIMM containing * a page, or in a bank (i.e. for ECC errors)-always-has a Jxxx in its * unum. Therefore the set of cmd_dimm_t's created for a branch is always * disjoint from the set of cmd_dimm_t's created for pages and/or banks, so * the cmd_dimm_create will never link a 'branch' cmd_dimm_t into bank. * Faulting a DIMM for ECC will not prevent subsequent faulting of "same" * dimm for FBR/FBU and vice versa */ static int branch_dimmlist_create(fmd_hdl_t *hdl, cmd_branch_t *branch) { topo_hdl_t *thp; topo_walk_t *twp; int err, dimm_count; cmd_list_t *bp; if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL) return (0); if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_MEM, branch_dimm_cb, branch, &err)) == NULL) { fmd_hdl_topo_rele(hdl, thp); return (0); } br_hdl = hdl; (void) topo_walk_step(twp, TOPO_WALK_CHILD); topo_walk_fini(twp); fmd_hdl_topo_rele(hdl, thp); for (dimm_count = 0, bp = cmd_list_next(&branch->branch_dimms); bp != NULL; bp = cmd_list_next(bp), dimm_count++) ; return (dimm_count); }
static cmd_dimm_t * dimm_lookup_by_unum(const char *unum) { cmd_dimm_t *dimm; for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = cmd_list_next(dimm)) { if (strcmp(dimm->dimm_unum, unum) == 0) return (dimm); } return (NULL); }
static void dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm) { cmd_bank_t *bank; for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL; bank = cmd_list_next(bank)) { if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl, dimm->dimm_asru_nvl)) { cmd_bank_add_dimm(hdl, bank, dimm); return; } } }
void cmd_branch_fini(fmd_hdl_t *hdl) { cmd_branch_t *branch; fmd_hdl_debug(hdl, "cmd_branch_fini\n"); while ((branch = cmd_list_next(&cmd.cmd_branches)) != NULL) branch_free(hdl, branch, FMD_B_FALSE); }
/* * If the case has been solved, don't need to check the dimmlist * If the case has not been solved, the branch is valid if there is least one * existing dimm in the branch */ void cmd_branch_validate(fmd_hdl_t *hdl) { cmd_branch_t *branch, *next; fmd_hdl_debug(hdl, "cmd_branch_validate\n"); for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL; branch = next) { next = cmd_list_next(branch); if (branch->branch_case.cc_cp != NULL && fmd_case_solved(hdl, branch->branch_case.cc_cp)) continue; if (branch_exist(hdl, branch)) continue; cmd_branch_destroy(hdl, branch); } }
static void branch_dimmlist_free(fmd_hdl_t *hdl, cmd_branch_t *branch) { cmd_branch_memb_t *bm; while ((bm = cmd_list_next(&branch->branch_dimms)) != NULL) { cmd_list_delete(&branch->branch_dimms, bm); fmd_hdl_free(hdl, bm, sizeof (cmd_branch_memb_t)); } }
void cmd_branch_remove_dimm(fmd_hdl_t *hdl, cmd_branch_t *branch, cmd_dimm_t *dimm) { cmd_branch_memb_t *bm; fmd_hdl_debug(hdl, "Detaching dimm %s from branch %s\n", dimm->dimm_unum, branch->branch_unum); for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL; bm = cmd_list_next(bm)) { if (bm->dimm == dimm) { cmd_list_delete(&branch->branch_dimms, bm); fmd_hdl_free(hdl, bm, sizeof (cmd_branch_memb_t)); return; } } fmd_hdl_abort(hdl, "Attempt to disconnect dimm from non-parent branch\n"); }
cmd_branch_t * cmd_branch_lookup(fmd_hdl_t *hdl, nvlist_t *asru) { cmd_branch_t *branch; const char *unum; if ((unum = cmd_fmri_get_unum(asru)) == NULL) { CMD_STAT_BUMP(bad_mem_asru); return (NULL); } for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL; branch = cmd_list_next(branch)) { if (strcmp(branch->branch_unum, unum) == 0) return (branch); } fmd_hdl_debug(hdl, "cmd_branch_lookup: discarding old \n"); return (NULL); }
cmd_branch_t * cmd_branch_lookup_by_unum(fmd_hdl_t *hdl, const char *unum) { cmd_branch_t *branch; fmd_hdl_debug(hdl, "branch_lookup: dimm_unum %s", unum); /* * fbr/fbu unum dimm does not have a J number */ if (strstr(unum, "J") != NULL) return (NULL); for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL; branch = cmd_list_next(branch)) { if (strcmp(branch->branch_unum, unum) == 0) return (branch); } fmd_hdl_debug(hdl, "branch_lookup_by_unum: no branch is found\n"); return (NULL); }
char * mbd_label(fmd_hdl_t *hdl, cmd_branch_t *branch, const char *nacname) { cmd_dimm_t *dimm; cmd_branch_memb_t *bm; char *p; size_t s; for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL; bm = cmd_list_next(bm)) { dimm = bm->dimm; if ((p = strstr(dimm->dimm_unum, nacname)) != NULL) { p = strchr(p, '/'); /* include instance number */ s = p - dimm->dimm_unum; p = fmd_hdl_zalloc(hdl, s+1, FMD_SLEEP); (void) strncpy(p, dimm->dimm_unum, s); *(p + s) = '\0'; return (p); } } return (NULL); }
void cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm) { int i; cmd_mq_t *q; for (i = 0; i < CMD_MAX_CKWDS; i++) { while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) { if (q->mq_serdnm != NULL) { if (fmd_serd_exists(hdl, q->mq_serdnm)) { fmd_serd_destroy(hdl, q->mq_serdnm); } fmd_hdl_strfree(hdl, q->mq_serdnm); q->mq_serdnm = NULL; } cmd_list_delete(&dimm->mq_root[i], q); fmd_hdl_free(hdl, q, sizeof (cmd_mq_t)); } } fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat)); cmd_dimm_free(hdl, dimm, FMD_B_TRUE); }
void * cmd_branch_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr) { cmd_branch_t *branch; size_t branchsz; for (branch = cmd_list_next(&cmd.cmd_branches); branch != NULL; branch = cmd_list_next(branch)) { if (strcmp(branch->branch_bufname, ptr->ptr_name) == 0) break; } if (branch == NULL) { fmd_hdl_debug(hdl, "restoring branch from %s\n", ptr->ptr_name); if ((branchsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) { fmd_hdl_abort(hdl, "branch referenced by case %s does " "not exist in saved state\n", fmd_case_uuid(hdl, cp)); } else if (branchsz > CMD_BRANCH_MAXSIZE || branchsz < CMD_BRANCH_MINSIZE) { fmd_hdl_abort(hdl, "branch buffer referenced by case %s " "is out of bounds (is %u bytes, max %u, min %u)\n", fmd_case_uuid(hdl, cp), branchsz, CMD_BRANCH_MAXSIZE, CMD_BRANCH_MINSIZE); } if ((branch = cmd_buf_read(hdl, NULL, ptr->ptr_name, branchsz)) == NULL) { fmd_hdl_abort(hdl, "failed to read branch buf %s", ptr->ptr_name); } fmd_hdl_debug(hdl, "found %d in version field\n", branch->branch_version); switch (branch->branch_version) { case CMD_BRANCH_VERSION_0: branch = branch_wrapv0(hdl, (cmd_branch_pers_t *)branch, branchsz); break; default: fmd_hdl_abort(hdl, "unknown version (found %d) " "for branch state referenced by case %s.\n", branch->branch_version, fmd_case_uuid(hdl, cp)); break; } cmd_fmri_restore(hdl, &branch->branch_asru); if ((errno = nvlist_lookup_string(branch->branch_asru_nvl, FM_FMRI_MEM_UNUM, (char **)&branch->branch_unum)) != 0) fmd_hdl_abort(hdl, "failed to retrieve unum from asru"); cmd_list_append(&cmd.cmd_branches, branch); } switch (ptr->ptr_subtype) { case CMD_PTR_BRANCH_CASE: cmd_mem_case_restore(hdl, &branch->branch_case, cp, "branch", branch->branch_unum); break; default: fmd_hdl_abort(hdl, "invalid %s subtype %d\n", ptr->ptr_name, ptr->ptr_subtype); } return (branch); }
/* * For t5440, the memory channel goes like this: * VF -> cpuboard -> D0 -> motherboard -> memboard -> D[1..3] * If there is a dimm on the memory board, the memory board, * motherboard, cpuboard, and dimms are in the suspect list. * If there is no dimm on the memory board, the cpu board and * the dimms are in the suspect list * The board certainty = total board certainty / number of * the faulty boards in the suspect list. */ void cmd_branch_create_fault(fmd_hdl_t *hdl, cmd_branch_t *branch, const char *fltnm, nvlist_t *asru) { nvlist_t *flt; cmd_branch_memb_t *bm; cmd_dimm_t *dimm; int dimm_count = 0; uint_t cert = 0; uint_t board_cert = 0; char *fruloc = NULL, *membd_label; /* attach the dimms to the branch */ dimm_count = branch_dimmlist_create(hdl, branch); if ((membd_label = mbd_label(hdl, branch, "MEM")) != NULL) { board_cert = CMD_BOARDS_CERT / 3; /* CPU, MEM, MB */ /* * Batoka with memory expansion. CPU expansion board will * be added below. Add memory expansion board and motherboard * FRUs here. */ add_bdflt_to_case(hdl, membd_label, fltnm, board_cert, branch->branch_case.cc_cp); fmd_hdl_strfree(hdl, membd_label); add_bdflt_to_case(hdl, "MB", fltnm, board_cert, branch->branch_case.cc_cp); } else if ((membd_label = mbd_label(hdl, branch, "MR")) != NULL) { board_cert = CMD_BOARDS_CERT / 2; /* MB, MR */ /* * Maramba or similar platform with mezzanine board. * Motherboard FRU will be added below. Add the mezzanine * board here. */ add_bdflt_to_case(hdl, membd_label, fltnm, board_cert, branch->branch_case.cc_cp); fmd_hdl_strfree(hdl, membd_label); } else { board_cert = CMD_BOARDS_CERT; /* only MB or CPU */ } /* * The code which follows adds to the suspect list the FRU which * contains the ereport 'detector'. This can be either a CPU * expansion board (Batoka), or motherboard (Huron, Maramba, or * derivative). */ fruloc = cmd_getfru_loc(hdl, asru); flt = cmd_boardfru_create_fault(hdl, asru, fltnm, board_cert, fruloc); if (flt != NULL) fmd_case_add_suspect(hdl, branch->branch_case.cc_cp, flt); if (dimm_count != 0) cert = (100 - CMD_BOARDS_CERT) / dimm_count; /* create dimm faults */ for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL; bm = cmd_list_next(bm)) { dimm = bm->dimm; if (dimm != NULL) { dimm->dimm_flags |= CMD_MEM_F_FAULTING; cmd_dimm_dirty(hdl, dimm); flt = cmd_dimm_create_fault(hdl, dimm, fltnm, cert); fmd_case_add_suspect(hdl, branch->branch_case.cc_cp, flt); } } if (fruloc != NULL) fmd_hdl_strfree(hdl, fruloc); }