void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); goto out; } if (is_old_reply(ls, rc)) goto out; switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); break; case DLM_RCOM_NAMES: receive_rcom_names(ls, rc); break; case DLM_RCOM_LOOKUP: receive_rcom_lookup(ls, rc); break; case DLM_RCOM_LOCK: if (rc->rc_header.h_length < lock_size) goto Eshort; receive_rcom_lock(ls, rc); break; case DLM_RCOM_STATUS_REPLY: receive_sync_reply(ls, rc); break; case DLM_RCOM_NAMES_REPLY: receive_sync_reply(ls, rc); break; case DLM_RCOM_LOOKUP_REPLY: receive_rcom_lookup_reply(ls, rc); break; case DLM_RCOM_LOCK_REPLY: if (rc->rc_header.h_length < lock_size) goto Eshort; dlm_recover_process_copy(ls, rc); break; default: log_error(ls, "receive_rcom bad type %d", rc->rc_type); } out: return; Eshort: log_error(ls, "recovery message %x from %d is too short", rc->rc_type, nodeid); }
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); goto out; } if (is_old_reply(ls, rc)) goto out; switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); break; case DLM_RCOM_NAMES: receive_rcom_names(ls, rc); break; case DLM_RCOM_LOOKUP: receive_rcom_lookup(ls, rc); break; case DLM_RCOM_LOCK: receive_rcom_lock(ls, rc); break; case DLM_RCOM_STATUS_REPLY: receive_rcom_status_reply(ls, rc); break; case DLM_RCOM_NAMES_REPLY: receive_rcom_names_reply(ls, rc); break; case DLM_RCOM_LOOKUP_REPLY: receive_rcom_lookup_reply(ls, rc); break; case DLM_RCOM_LOCK_REPLY: receive_rcom_lock_reply(ls, rc); break; default: DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); }
static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) { unsigned long start; int error, neg = 0; log_debug(ls, "recover %llx", (unsigned long long)rv->seq); mutex_lock(&ls->ls_recoverd_active); /* * Suspending and resuming dlm_astd ensures that no lkb's from this ls * will be processed by dlm_astd during recovery. */ dlm_astd_suspend(); dlm_astd_resume(); /* * Free non-master tossed rsb's. Master rsb's are kept on toss * list and put on root list to be included in resdir recovery. */ dlm_clear_toss_list(ls); /* * This list of root rsb's will be the basis of most of the recovery * routines. */ dlm_create_root_list(ls); /* * Add or remove nodes from the lockspace's ls_nodes list. * Also waits for all nodes to complete dlm_recover_members. */ error = dlm_recover_members(ls, rv, &neg); if (error) { log_debug(ls, "recover_members failed %d", error); goto fail; } start = jiffies; /* * Rebuild our own share of the directory by collecting from all other * nodes their master rsb names that hash to us. */ error = dlm_recover_directory(ls); if (error) { log_debug(ls, "recover_directory failed %d", error); goto fail; } /* * Wait for all nodes to complete directory rebuild. */ error = dlm_recover_directory_wait(ls); if (error) { log_debug(ls, "recover_directory_wait failed %d", error); goto fail; } /* * We may have outstanding operations that are waiting for a reply from * a failed node. Mark these to be resent after recovery. Unlock and * cancel ops can just be completed. */ dlm_recover_waiters_pre(ls); error = dlm_recovery_stopped(ls); if (error) goto fail; if (neg || dlm_no_directory(ls)) { /* * Clear lkb's for departed nodes. */ dlm_purge_locks(ls); /* * Get new master nodeid's for rsb's that were mastered on * departed nodes. */ error = dlm_recover_masters(ls); if (error) { log_debug(ls, "recover_masters failed %d", error); goto fail; } /* * Send our locks on remastered rsb's to the new masters. */ error = dlm_recover_locks(ls); if (error) { log_debug(ls, "recover_locks failed %d", error); goto fail; } error = dlm_recover_locks_wait(ls); if (error) { log_debug(ls, "recover_locks_wait failed %d", error); goto fail; } /* * Finalize state in master rsb's now that all locks can be * checked. This includes conversion resolution and lvb * settings. */ dlm_recover_rsbs(ls); } else { /* * Other lockspace members may be going through the "neg" steps * while also adding us to the lockspace, in which case they'll * be doing the recover_locks (RS_LOCKS) barrier. */ dlm_set_recover_status(ls, DLM_RS_LOCKS); error = dlm_recover_locks_wait(ls); if (error) { log_debug(ls, "recover_locks_wait failed %d", error); goto fail; } } dlm_release_root_list(ls); /* * Purge directory-related requests that are saved in requestqueue. * All dir requests from before recovery are invalid now due to the dir * rebuild and will be resent by the requesting nodes. */ dlm_purge_requestqueue(ls); dlm_set_recover_status(ls, DLM_RS_DONE); error = dlm_recover_done_wait(ls); if (error) { log_debug(ls, "recover_done_wait failed %d", error); goto fail; } dlm_clear_members_gone(ls); dlm_adjust_timeouts(ls); error = enable_locking(ls, rv->seq); if (error) { log_debug(ls, "enable_locking failed %d", error); goto fail; } error = dlm_process_requestqueue(ls); if (error) { log_debug(ls, "process_requestqueue failed %d", error); goto fail; } error = dlm_recover_waiters_post(ls); if (error) { log_debug(ls, "recover_waiters_post failed %d", error); goto fail; } dlm_grant_after_purge(ls); dlm_astd_wake(); log_debug(ls, "recover %llx done: %u ms", (unsigned long long)rv->seq, jiffies_to_msecs(jiffies - start)); mutex_unlock(&ls->ls_recoverd_active); return 0; fail: dlm_release_root_list(ls); log_debug(ls, "recover %llx error %d", (unsigned long long)rv->seq, error); mutex_unlock(&ls->ls_recoverd_active); return error; }
void dlm_receive_rcom(struct dlm_header *hd, int nodeid) { struct dlm_rcom *rc = (struct dlm_rcom *) hd; struct dlm_ls *ls; dlm_rcom_in(rc); /* If the lockspace doesn't exist then still send a status message back; it's possible that it just doesn't have its global_id yet. */ ls = dlm_find_lockspace_global(hd->h_lockspace); if (!ls) { log_print("lockspace %x from %d type %x not found", hd->h_lockspace, nodeid, rc->rc_type); if (rc->rc_type == DLM_RCOM_STATUS) send_ls_not_ready(nodeid, rc); return; } if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); goto out; } if (is_old_reply(ls, rc)) goto out; if (nodeid != rc->rc_header.h_nodeid) { log_error(ls, "bad rcom nodeid %d from %d", rc->rc_header.h_nodeid, nodeid); goto out; } switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); break; case DLM_RCOM_NAMES: receive_rcom_names(ls, rc); break; case DLM_RCOM_LOOKUP: receive_rcom_lookup(ls, rc); break; case DLM_RCOM_LOCK: receive_rcom_lock(ls, rc); break; case DLM_RCOM_STATUS_REPLY: receive_rcom_status_reply(ls, rc); break; case DLM_RCOM_NAMES_REPLY: receive_rcom_names_reply(ls, rc); break; case DLM_RCOM_LOOKUP_REPLY: receive_rcom_lookup_reply(ls, rc); break; case DLM_RCOM_LOCK_REPLY: receive_rcom_lock_reply(ls, rc); break; default: DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); }