int servant(const char *diskname, int mode, const void* argp) { struct sector_mbox_s *s_mbox = NULL; struct sector_node_s *s_node = NULL; struct sector_header_s *s_header = NULL; int mbox; int rc = 0; time_t t0, t1, latency; union sigval signal_value; sigset_t servant_masks; struct sbd_context *st; pid_t ppid; char uuid[37]; const struct servants_list_item *s = argp; if (!diskname) { cl_log(LOG_ERR, "Empty disk name %s.", diskname); return -1; } cl_log(LOG_INFO, "Servant starting for device %s", diskname); /* Block most of the signals */ sigfillset(&servant_masks); sigdelset(&servant_masks, SIGKILL); sigdelset(&servant_masks, SIGFPE); sigdelset(&servant_masks, SIGILL); sigdelset(&servant_masks, SIGSEGV); sigdelset(&servant_masks, SIGBUS); sigdelset(&servant_masks, SIGALRM); /* FIXME: check error */ sigprocmask(SIG_SETMASK, &servant_masks, NULL); atexit(servant_exit); servant_inform_parent = 1; st = open_device(diskname, LOG_WARNING); if (!st) { return -1; } s_header = header_get(st); if (!s_header) { cl_log(LOG_ERR, "Not a valid header on %s", diskname); return -1; } if (servant_check_timeout_inconsistent(s_header) < 0) { cl_log(LOG_ERR, "Timeouts on %s do not match first device", diskname); return -1; } if (s_header->minor_version > 0) { uuid_unparse_lower(s_header->uuid, uuid); cl_log(LOG_INFO, "Device %s uuid: %s", diskname, uuid); } mbox = slot_allocate(st, local_uname); if (mbox < 0) { cl_log(LOG_ERR, "No slot allocated, and automatic allocation failed for disk %s.", diskname); rc = -1; goto out; } s_node = sector_alloc(); if (slot_read(st, mbox, s_node) < 0) { cl_log(LOG_ERR, "Unable to read node entry on %s", diskname); exit(1); } DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname); if (s_header->minor_version == 0) { set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); } else { set_proc_title("sbd: watcher: %s - slot: %d - uuid: %s", diskname, mbox, uuid); } s_mbox = sector_alloc(); if (s->first_start) { if (mode > 0) { if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed during start-up in servant."); rc = -1; goto out; } if (s_mbox->cmd != SBD_MSG_EXIT && s_mbox->cmd != SBD_MSG_EMPTY) { /* Not a clean stop. Abort start-up */ cl_log(LOG_WARNING, "Found fencing message - aborting start-up. Manual intervention required!"); ppid = getppid(); sigqueue(ppid, SIG_EXITREQ, signal_value); rc = 0; goto out; } } DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); memset(s_mbox, 0, sizeof(*s_mbox)); if (mbox_write(st, mbox, s_mbox) < 0) { rc = -1; goto out; } } memset(&signal_value, 0, sizeof(signal_value)); while (1) { struct sector_header_s *s_header_retry = NULL; struct sector_node_s *s_node_retry = NULL; t0 = time(NULL); sleep(timeout_loop); ppid = getppid(); if (ppid == 1) { /* Our parent died unexpectedly. Triggering * self-fence. */ do_reset(); } /* These attempts are, by definition, somewhat racy. If * the device is wiped out or corrupted between here and * us reading our mbox, there is nothing we can do about * that. But at least we tried. */ s_header_retry = header_get(st); if (!s_header_retry) { cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); exit(1); } if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); exit(1); } free(s_header_retry); s_node_retry = sector_alloc(); if (slot_read(st, mbox, s_node_retry) < 0) { cl_log(LOG_ERR, "slot read failed in servant."); exit(1); } if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); exit(1); } free(s_node_retry); if (mbox_read(st, mbox, s_mbox) < 0) { cl_log(LOG_ERR, "mbox read failed in servant."); exit(1); } if (s_mbox->cmd > 0) { cl_log(LOG_INFO, "Received command %s from %s on disk %s", char2cmd(s_mbox->cmd), s_mbox->from, diskname); switch (s_mbox->cmd) { case SBD_MSG_TEST: memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); sigqueue(ppid, SIG_TEST, signal_value); break; case SBD_MSG_RESET: do_reset(); break; case SBD_MSG_OFF: do_off(); break; case SBD_MSG_EXIT: sigqueue(ppid, SIG_EXITREQ, signal_value); break; case SBD_MSG_CRASHDUMP: do_crashdump(); break; default: /* FIXME: An "unknown" message might result from a partial write. log it and clear the slot. */ cl_log(LOG_ERR, "Unknown message on disk %s", diskname); memset(s_mbox, 0, sizeof(*s_mbox)); mbox_write(st, mbox, s_mbox); break; } } sigqueue(ppid, SIG_LIVENESS, signal_value); t1 = time(NULL); latency = t1 - t0; if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) { cl_log(LOG_WARNING, "Latency: %d exceeded threshold %d on disk %s", (int)latency, (int)timeout_watchdog_warn, diskname); } else if (debug) { DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency, diskname); } } out: free(s_mbox); close_device(st); if (rc == 0) { servant_inform_parent = 0; } return rc; }
/* ** This function must a send reply from at least one node, otherwise ** the requesting fence_virtd will block forever in wait_cpt_reply. */ static void do_real_work(void *data, size_t len, uint32_t nodeid, uint32_t seqno) { struct cpg_info *info = cpg_virt_handle; struct cpg_fence_req *req = data; struct cpg_fence_req reply; int reply_code = -1; virt_state_t *vs = NULL; int cur_state; uint32_t cur_owner = 0; int local = 0; uint32_t my_id, high_id; dbg_printf(2, "Request %d for VM %s\n", req->request, req->vm_name); if (cpg_get_ids(&my_id, &high_id) == -1) { syslog(LOG_WARNING, "Unable to get CPG IDs"); printf("Should never happen: Can't get CPG node ids - can't proceed\n"); return; } memcpy(&reply, req, sizeof(reply)); pthread_mutex_lock(&local_vm_list_lock); update_local_vms(info); if (strlen(req->vm_name)) { if (use_uuid) vs = vl_find_uuid(local_vm_list, req->vm_name); else vs = vl_find_name(local_vm_list, req->vm_name); if (vs) { local = 1; cur_owner = vs->v_state.s_owner; cur_state = vs->v_state.s_state; dbg_printf(2, "Found VM %s locally state %d\n", req->vm_name, cur_state); } } pthread_mutex_unlock(&local_vm_list_lock); if (vs == NULL) { pthread_mutex_lock(&remote_vm_list_lock); if (strlen(req->vm_name)) { if (use_uuid) vs = vl_find_uuid(remote_vm_list, req->vm_name); else vs = vl_find_name(remote_vm_list, req->vm_name); if (vs) { cur_owner = vs->v_state.s_owner; cur_state = vs->v_state.s_state; dbg_printf(2, "Found VM %s remotely on %u state %d\n", req->vm_name, cur_owner, cur_state); } } pthread_mutex_unlock(&remote_vm_list_lock); } if (!vs) { /* ** We know about all domains on all nodes in the CPG group. ** If we didn't find it, and we're high ID, act on the request. ** We can safely assume the VM is OFF because it wasn't found ** on any current members of the CPG group. */ if (my_id == high_id) { if (req->request == FENCE_STATUS) reply_code = RESP_OFF; else if (req->request == FENCE_OFF || req->request == FENCE_REBOOT) reply_code = RESP_SUCCESS; else reply_code = 1; dbg_printf(2, "Acting on request %d for unknown domain %s -> %d\n", req->request, req->vm_name, reply_code); goto out; } dbg_printf(2, "Not acting on request %d for unknown domain %s\n", req->request, req->vm_name); return; } if (local) { if (req->request == FENCE_STATUS) { /* We already have the status */ if (cur_state == VIR_DOMAIN_SHUTOFF) reply_code = RESP_OFF; else reply_code = RESP_SUCCESS; } else if (req->request == FENCE_OFF) { reply_code = do_off(info, req->vm_name); } else if (req->request == FENCE_ON) { reply_code = do_on(info, req->vm_name); } else if (req->request == FENCE_REBOOT) { reply_code = do_reboot(info, req->vm_name); } else { dbg_printf(2, "Not explicitly handling request type %d for %s\n", req->request, req->vm_name); reply_code = 0; } goto out; } /* ** This is a request for a non-local domain that exists on a ** current CPG group member, so that member will see the request ** and act on it. We don't need to do anything. */ dbg_printf(2, "Nothing to do for non-local domain %s seq %d owner %u\n", req->vm_name, seqno, cur_owner); return; out: dbg_printf(2, "[%s] sending reply code seq %d -> %d\n", req->vm_name, seqno, reply_code); reply.response = reply_code; if (cpg_send_reply(&reply, sizeof(reply), nodeid, seqno) < 0) { dbg_printf(2, "cpg_send_reply failed for %s [%d %d]: %s\n", req->vm_name, nodeid, seqno, strerror(errno)); } }