예제 #1
0
static void
reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
{
    crm_node_t *node = value;

    if (crm_is_peer_active(node) == FALSE) {
        crm_update_peer_join(__FUNCTION__, node, crm_join_none);

        if(node && node->uname) {
            election_remove(fsa_election, node->uname);

            if (safe_str_eq(fsa_our_uname, node->uname)) {
                crm_err("We're not part of the cluster anymore");
                register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);

            } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) {
                crm_warn("Our DC node (%s) left the cluster", node->uname);
                register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
            }
        }

        if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
            check_join_state(fsa_state, __FUNCTION__);
        }
        fail_incompletable_actions(transition_graph, node->uuid);
    }
}
예제 #2
0
void
notify_crmd(crm_graph_t * graph)
{
    const char *type = "unknown";
    enum crmd_fsa_input event = I_NULL;

    crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));

    CRM_CHECK(graph->complete, graph->complete = TRUE);

    switch (graph->completion_action) {
        case tg_stop:
            type = "stop";
            /* fall through */
        case tg_done:
            type = "done";
            if (fsa_state == S_TRANSITION_ENGINE) {
                event = I_TE_SUCCESS;
            }
            break;

        case tg_restart:
            type = "restart";
            if (fsa_state == S_TRANSITION_ENGINE) {
                if (transition_timer->period_ms > 0) {
                    crm_timer_stop(transition_timer);
                    crm_timer_start(transition_timer);
                } else if (too_many_st_failures() == FALSE) {
                    event = I_PE_CALC;
                }

            } else if (fsa_state == S_POLICY_ENGINE) {
                register_fsa_action(A_PE_INVOKE);
            }
            break;

        case tg_shutdown:
            type = "shutdown";
            if (is_set(fsa_input_register, R_SHUTDOWN)) {
                event = I_STOP;

            } else {
                crm_err("We didn't ask to be shut down, yet our" " PE is telling us too.");
                event = I_TERMINATE;
            }
    }

    crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));

    graph->abort_reason = NULL;
    graph->completion_action = tg_done;
    clear_bit(fsa_input_register, R_IN_TRANSITION);

    if (event != I_NULL) {
        register_fsa_input(C_FSA_INTERNAL, event, NULL);

    } else if (fsa_source) {
        mainloop_set_trigger(fsa_source);
    }
}
예제 #3
0
/*	 A_DC_RELEASE	*/
void
do_dc_release(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state,
              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    if (action & A_DC_RELEASE) {
        crm_debug("Releasing the role of DC");
        clear_bit(fsa_input_register, R_THE_DC);

    } else if (action & A_DC_RELEASED) {
        crm_info("DC role released");
#if 0
        if (are there errors) {
            /* we cant stay up if not healthy */
            /* or perhaps I_ERROR and go to S_RECOVER? */
            result = I_SHUTDOWN;
        }
#endif
        register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);

    } else {
        crm_err("Unknown action %s", fsa_action2string(action));
    }

    crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);

}
/*
  A_ELECTION_CHECKアクション
  CRM_OP_VOTEメッセージを受信してから流れる処理
  
  voteハッシュテーブルに全てのアクティブなクラスタ構成メンバーからの受信をおこなったかどうかチェックし
  全ての受信が行われている場合は、I_ELECTION_DCへ遷移する */
*/
void
do_election_check(long long action,
		       enum crmd_fsa_cause cause,
		       enum crmd_fsa_state cur_state,
		       enum crmd_fsa_input current_input,
		  fsa_data_t *msg_data)
{
	int voted_size = 0;
	
	/* 現在のアクティブなメンバー数を取得する */
	int num_members = crm_active_members();

	if(voted) {
		/* votedハッシュテーブルが存在する場合は、テーブルサイズを取得する */
	    voted_size = g_hash_table_size(voted);
	}
	/* in the case of #voted > #members, it is better to
	 *   wait for the timeout and give the cluster time to
	 *   stabilize
	 */
	if(fsa_state != S_ELECTION) {
		/* S_ELECTION状態の場合はチェックしない */
		crm_debug("Ignore election check: we not in an election");

	} else if(voted_size >= num_members) {
		/* voteハッシュテーブルのサイズ(メンバー数)が現在のアクティブなメンバー数以上の場合 */
		/* we won and everyone has voted */
		
		/* メンバーが揃ったのでelection_timeoutタイマーを止める */
		crm_timer_stop(election_timeout);
		
		/* アクティブメンバー数とvoteサイズが一致もしくは、voteサイズが大きい状態になったので、 */
		/* メンバーは揃ったので、内部メッセージにI_ELECTION_DCをセットする */
		register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
		
		if(voted_size > num_members) {
			char *data = NULL;
			
			data = crm_strdup("member");
			g_hash_table_foreach(crm_peer_cache, log_member_uname, data);
			crm_free(data);
			
			data = crm_strdup("voted");
			g_hash_table_foreach(voted, log_node, data);
			crm_free(data);
			
		}
		crm_debug("Destroying voted hash");
		/* votedハッシュテーブルを破棄する */
		g_hash_table_destroy(voted);
		voted = NULL;
		
	} else {
		crm_debug("Still waiting on %d non-votes (%d total)",
			 num_members - voted_size, num_members);
	}

	return;
}
예제 #5
0
gboolean
crm_timer_popped(gpointer data)
{
    fsa_timer_t *timer = (fsa_timer_t *) data;

    if (timer == wait_timer
        || timer == recheck_timer
        || timer == transition_timer || timer == finalization_timer || timer == election_trigger) {
        crm_info("%s (%s) just popped (%dms)",
                 get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms);
        timer->counter++;

    } else {
        crm_err("%s (%s) just popped in state %s! (%dms)",
                get_timer_desc(timer), fsa_input2string(timer->fsa_input),
                fsa_state2string(fsa_state), timer->period_ms);
    }

    if (timer == election_trigger && election_trigger->counter > 5) {
        crm_notice("We appear to be in an election loop, something may be wrong");
        crm_write_blackbox(0, NULL);
        election_trigger->counter = 0;
    }

    if (timer->repeat == FALSE) {
        crm_timer_stop(timer);  /* make it _not_ go off again */
    }

    if (timer->fsa_input == I_INTEGRATED) {
        crm_info("Welcomed: %d, Integrated: %d",
                 crmd_join_phase_count(crm_join_welcomed),
                 crmd_join_phase_count(crm_join_integrated));
        if (crmd_join_phase_count(crm_join_welcomed) == 0) {
            /* If we don't even have ourself, start again */
            register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__);

        } else {
            register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
        }

    } else if (timer == recheck_timer && fsa_state != S_IDLE) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer->fsa_input != I_NULL) {
        register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
    }

    crm_trace("Triggering FSA: %s", __FUNCTION__);
    mainloop_set_trigger(fsa_source);

    return TRUE;
}
예제 #6
0
파일: cib.c 프로젝트: sipwise/heartbeat
static void
do_cib_replaced(const char *event, HA_Message *msg)
{
	crm_debug("Updating the CIB after a replace");
 	populate_cib_nodes(fsa_cluster_conn, FALSE);
	do_update_cib_nodes(AM_I_DC, __FUNCTION__);
	if(AM_I_DC) {
		/* start the join process again so we get everyone's LRM status */
		register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
	}
}
예제 #7
0
void
do_election_check(long long action,
                  enum crmd_fsa_cause cause,
                  enum crmd_fsa_state cur_state,
                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int voted_size = 0;
    int num_members = crm_active_peers();

    if (voted) {
        voted_size = g_hash_table_size(voted);
    }
    /* in the case of #voted > #members, it is better to
     *   wait for the timeout and give the cluster time to
     *   stabilize
     */
    if (fsa_state != S_ELECTION) {
        crm_debug("Ignore election check: we not in an election");

    } else if (voted_size >= num_members) {
        /* we won and everyone has voted */
        crm_timer_stop(election_timeout);
        register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
        if (voted_size > num_members) {
            GHashTableIter gIter;
            const crm_node_t *node;
            char *key = NULL;

            g_hash_table_iter_init(&gIter, crm_peer_cache);
            while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
                if (crm_is_peer_active(node)) {
                    crm_err("member: %s proc=%.32x", node->uname, node->processes);
                }
            }

            g_hash_table_iter_init(&gIter, voted);
            while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
                crm_err("voted: %s", key);
            }

        }
        crm_debug("Destroying voted hash");
        g_hash_table_destroy(voted);
        voted = NULL;

    } else {
        crm_debug("Still waiting on %d non-votes (%d total)",
                  num_members - voted_size, num_members);
    }

    return;
}
예제 #8
0
void
crmd_ha_connection_destroy(gpointer user_data)
{
    crm_trace("Invoked");
    if (is_set(fsa_input_register, R_HA_DISCONNECTED)) {
        /* we signed out, so this is expected */
        crm_info("Heartbeat disconnection complete");
        return;
    }

    crm_crit("Lost connection to heartbeat service!");
    register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL);
    trigger_fsa(fsa_source);
}
예제 #9
0
파일: utils.c 프로젝트: brhellman/pacemaker
gboolean
crm_timer_popped(gpointer data)
{
    fsa_timer_t *timer = (fsa_timer_t *) data;

    if (timer == wait_timer
        || timer == recheck_timer
        || timer == transition_timer || timer == finalization_timer || timer == election_trigger) {
        crm_info("%s (%s) just popped (%dms)",
                 get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms);

    } else {
        crm_err("%s (%s) just popped in state %s! (%dms)",
                get_timer_desc(timer), fsa_input2string(timer->fsa_input),
                fsa_state2string(fsa_state), timer->period_ms);
    }

    if (timer->repeat == FALSE) {
        crm_timer_stop(timer);  /* make it _not_ go off again */
    }

    if (timer->fsa_input == I_INTEGRATED) {
        crm_info("Welcomed: %d, Integrated: %d",
                 g_hash_table_size(welcomed_nodes), g_hash_table_size(integrated_nodes));
        if (g_hash_table_size(welcomed_nodes) == 0) {
            /* If we don't even have ourself, start again */
            register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __FUNCTION__);

        } else {
            register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
        }

    } else if (timer == recheck_timer && fsa_state != S_IDLE) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) {
        crm_debug("Discarding %s event in state: %s",
                  fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));

    } else if (timer->fsa_input != I_NULL) {
        register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
    }

    crm_trace("Triggering FSA: %s", __FUNCTION__);
    mainloop_set_trigger(fsa_source);

    return TRUE;
}
예제 #10
0
파일: main.c 프로젝트: Xarthisius/pacemaker
int
crmd_init(void)
{
    int exit_code = 0;
    enum crmd_fsa_state state;

    fsa_state = S_STARTING;
    fsa_input_register = 0;     /* zero out the regester */

    init_dotfile();
    crm_debug("Starting %s", crm_system_name);
    register_fsa_input(C_STARTUP, I_STARTUP, NULL);

    crm_peer_init();
    state = s_crmd_fsa(C_STARTUP);

    if (state == S_PENDING || state == S_STARTING) {
        /* Create the mainloop and run it... */
        crmd_mainloop = g_main_new(FALSE);
        crm_trace("Starting %s's mainloop", crm_system_name);

#ifdef REALTIME_SUPPORT
        static int crm_realtime = 1;

        if (crm_realtime == 1) {
            cl_enable_realtime();
        } else if (crm_realtime == 0) {
            cl_disable_realtime();
        }
        cl_make_realtime(SCHED_RR, 5, 64, 64);
#endif
        g_main_run(crmd_mainloop);
        if (is_set(fsa_input_register, R_STAYDOWN)) {
            crm_info("Inhibiting respawn by Heartbeat");
            exit_code = 100;
        }

    } else {
        crm_err("Startup of %s failed.  Current state: %s",
                crm_system_name, fsa_state2string(state));
        exit_code = 1;
    }

    crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
    qb_log_fini();

    return exit_code;
}
예제 #11
0
void
do_election_check(long long action,
                  enum crmd_fsa_cause cause,
                  enum crmd_fsa_state cur_state,
                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    int voted_size = 0;
    int num_members = crm_active_peers();

    if (voted) {
        voted_size = g_hash_table_size(voted);
    }
    /* in the case of #voted > #members, it is better to
     *   wait for the timeout and give the cluster time to
     *   stabilize
     */
    if (fsa_state != S_ELECTION) {
        crm_debug("Ignore election check: we not in an election");

    } else if (voted_size >= num_members) {
        /* we won and everyone has voted */
        crm_timer_stop(election_timeout);
        register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
        if (voted_size > num_members) {
            char *data = NULL;

            data = strdup("member");
            g_hash_table_foreach(crm_peer_cache, log_member_uname, data);
            free(data);

            data = strdup("voted");
            g_hash_table_foreach(voted, log_node, data);
            free(data);

        }
        crm_debug("Destroying voted hash");
        g_hash_table_destroy(voted);
        voted = NULL;

    } else {
        crm_debug("Still waiting on %d non-votes (%d total)",
                  num_members - voted_size, num_members);
    }

    return;
}
예제 #12
0
static void
do_cib_replaced(const char *event, xmlNode *msg)
{
    crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC?"true":"false");
    if(AM_I_DC == FALSE) {
	return;
	
    } else if(fsa_state == S_FINALIZE_JOIN
	      && is_set(fsa_input_register, R_CIB_ASKED)) {
	/* no need to restart the join - we asked for this replace op */
	return;
    }
    
    /* start the join process again so we get everyone's LRM status */
    populate_cib_nodes(FALSE);
    do_update_cib_nodes(TRUE, __FUNCTION__);
    register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
예제 #13
0
/*	A_DC_TIMER_STOP, A_DC_TIMER_START,
 *	A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
 *	A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
 */
void
do_timer_control(long long action,
                 enum crmd_fsa_cause cause,
                 enum crmd_fsa_state cur_state,
                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    gboolean timer_op_ok = TRUE;

    if (action & A_DC_TIMER_STOP) {
        timer_op_ok = crm_timer_stop(election_trigger);

    } else if (action & A_FINALIZE_TIMER_STOP) {
        timer_op_ok = crm_timer_stop(finalization_timer);

    } else if (action & A_INTEGRATE_TIMER_STOP) {
        timer_op_ok = crm_timer_stop(integration_timer);

/* 	} else if(action & A_ELECTION_TIMEOUT_STOP) { */
/* 		timer_op_ok = crm_timer_stop(election_timeout); */
    }

    /* dont start a timer that wasnt already running */
    if (action & A_DC_TIMER_START && timer_op_ok) {
        crm_timer_start(election_trigger);
        if (AM_I_DC) {
            /* there can be only one */
            register_fsa_input(cause, I_ELECTION, NULL);
        }

    } else if (action & A_FINALIZE_TIMER_START) {
        crm_timer_start(finalization_timer);

    } else if (action & A_INTEGRATE_TIMER_START) {
        crm_timer_start(integration_timer);

/* 	} else if(action & A_ELECTION_TIMEOUT_START) { */
/* 		crm_timer_start(election_timeout); */
    }
}
예제 #14
0
/*	 A_DC_RELEASE	*/
void
do_dc_release(long long action,
              enum crmd_fsa_cause cause,
              enum crmd_fsa_state cur_state,
              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    if (action & A_DC_RELEASE) {
        GListPtr gIter = NULL;
        crm_debug("Releasing the role of DC");
        clear_bit(fsa_input_register, R_THE_DC);

        for (gIter = stonith_cleanup_list; gIter != NULL; gIter = gIter->next) {
            char *target = gIter->data;
            crm_debug("Purging %s from stonith cleanup list", target);
            free(target);
        }
        g_list_free(stonith_cleanup_list);
        stonith_cleanup_list = NULL;

    } else if (action & A_DC_RELEASED) {
        crm_info("DC role released");
#if 0
        if (are there errors) {
            /* we cant stay up if not healthy */
            /* or perhaps I_ERROR and go to S_RECOVER? */
            result = I_SHUTDOWN;
        }
#endif
        register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);

    } else {
        crm_err("Unknown action %s", fsa_action2string(action));
    }

    crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);

}
예제 #15
0
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
    ha_msg_input_t fsa_input;
    enum crmd_fsa_input result = I_NULL;

    fsa_input.msg = input;
    CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);

    /* try passing the buck first */
    if (relay_message(input, cause == C_IPC_MESSAGE)) {
        return;
    }

    /* handle locally */
    result = handle_message(input);

    /* done or process later? */
    switch (result) {
        case I_NULL:
        case I_CIB_OP:
        case I_ROUTER:
        case I_NODE_JOIN:
        case I_JOIN_REQUEST:
        case I_JOIN_RESULT:
            break;
        default:
            /* Defering local processing of message */
            register_fsa_input_later(cause, result, &fsa_input);
            return;
    }

    if (result != I_NULL) {
        /* add to the front of the queue */
        register_fsa_input(cause, result, &fsa_input);
    }
}
예제 #16
0
void
crmd_init(void)
{
    crm_exit_t exit_code = CRM_EX_OK;
    enum crmd_fsa_state state;

    log_deprecation_warnings();

    fsa_state = S_STARTING;
    fsa_input_register = 0;     /* zero out the regester */

    init_dotfile();
    register_fsa_input(C_STARTUP, I_STARTUP, NULL);

    crm_peer_init();
    state = s_crmd_fsa(C_STARTUP);

    if (state == S_PENDING || state == S_STARTING) {
        /* Create the mainloop and run it... */
        crm_trace("Starting %s's mainloop", crm_system_name);
        g_main_loop_run(crmd_mainloop);
        if (is_set(fsa_input_register, R_STAYDOWN)) {
            crm_info("Inhibiting automated respawn");
            exit_code = CRM_EX_FATAL;
        }

    } else {
        crm_err("Startup of %s failed.  Current state: %s",
                crm_system_name, fsa_state2string(state));
        exit_code = CRM_EX_ERROR;
    }

    crm_info("%s[%lu] exiting with status %d (%s)",
             crm_system_name, (unsigned long) getpid(), exit_code,
             crm_exit_str(exit_code));
    crmd_fast_exit(exit_code);
}
예제 #17
0
파일: te_utils.c 프로젝트: huiser/pacemaker
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
                       const char *abort_text, xmlNode * reason, const char *fn, int line)
{
    int log_level = LOG_INFO;
    const char *magic = NULL;

    CRM_CHECK(transition_graph != NULL, return);

    if (reason) {
        int diff_add_updates = 0;
        int diff_add_epoch = 0;
        int diff_add_admin_epoch = 0;

        int diff_del_updates = 0;
        int diff_del_epoch = 0;
        int diff_del_admin_epoch = 0;
        xmlNode *diff = get_xpath_object("//" F_CIB_UPDATE_RESULT "//diff", reason, LOG_DEBUG_2);

        magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);

        if (diff) {
            cib_diff_version_details(diff,
                                     &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
                                     &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
            if (crm_str_eq(TYPE(reason), XML_CIB_TAG_NVPAIR, TRUE)) {
                do_crm_log(log_level,
                           "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, name=%s, value=%s, magic=%s, cib=%d.%d.%d) : %s",
                           fn, line, transition_graph->complete, TYPE(reason), ID(reason),
                           NAME(reason), VALUE(reason), magic ? magic : "NA", diff_add_admin_epoch,
                           diff_add_epoch, diff_add_updates, abort_text);
            } else {
                do_crm_log(log_level,
                           "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s, cib=%d.%d.%d) : %s",
                           fn, line, transition_graph->complete, TYPE(reason), ID(reason),
                           magic ? magic : "NA", diff_add_admin_epoch, diff_add_epoch,
                           diff_add_updates, abort_text);
            }

        } else {
            do_crm_log(log_level,
                       "%s:%d - Triggered transition abort (complete=%d, tag=%s, id=%s, magic=%s) : %s",
                       fn, line, transition_graph->complete, TYPE(reason), ID(reason),
                       magic ? magic : "NA", abort_text);
        }

    } else {
        do_crm_log(log_level,
                   "%s:%d - Triggered transition abort (complete=%d) : %s",
                   fn, line, transition_graph->complete, abort_text);
    }

    switch (fsa_state) {
    case S_STARTING:
    case S_PENDING:
    case S_NOT_DC:
    case S_HALT:
    case S_ILLEGAL:
    case S_STOPPING:
    case S_TERMINATE:
        do_crm_log(log_level,
                   "Abort suppressed: state=%s (complete=%d)",
                   fsa_state2string(fsa_state), transition_graph->complete);
        return;
    default:
        break;
    }

    if (magic == NULL && reason != NULL) {
        crm_log_xml(log_level + 1, "Cause", reason);
    }

    /* Make sure any queued calculations are discarded ASAP */
    crm_free(fsa_pe_ref);
    fsa_pe_ref = NULL;

    if (transition_graph->complete) {
        if (transition_timer->period_ms > 0) {
            crm_timer_start(transition_timer);
        } else {
            register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
        }
        return;
    }

    update_abort_priority(transition_graph, abort_priority, abort_action, abort_text);

    mainloop_set_trigger(transition_trigger);
}
예제 #18
0
파일: cib.c 프로젝트: sipwise/heartbeat
/*	 A_CIB_INVOKE, A_CIB_BUMPGEN, A_UPDATE_NODESTATUS	*/
void
do_cib_invoke(long long action,
	      enum crmd_fsa_cause cause,
	      enum crmd_fsa_state cur_state,
	      enum crmd_fsa_input current_input,
	      fsa_data_t *msg_data)
{
	HA_Message *answer = NULL;
	ha_msg_input_t *cib_msg = fsa_typed_data(fsa_dt_ha_msg);
	const char *sys_from = cl_get_string(cib_msg->msg, F_CRM_SYS_FROM);

	if(fsa_cib_conn->state == cib_disconnected) {
		if(cur_state != S_STOPPING) {
			crm_err("CIB is disconnected");
			crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg);
			return;
		}
		crm_info("CIB is disconnected");
		crm_log_message_adv(LOG_DEBUG, "CIB Input", cib_msg->msg);
		return;
		
	}
	
	if(action & A_CIB_INVOKE) {
		if(safe_str_eq(sys_from, CRM_SYSTEM_CRMD)) {
			action = A_CIB_INVOKE_LOCAL;
		} else if(safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
			action = A_CIB_INVOKE_LOCAL;
		}
	}
	

	if(action & A_CIB_INVOKE || action & A_CIB_INVOKE_LOCAL) {
		int call_options = 0;
		enum cib_errors rc  = cib_ok;
		crm_data_t *cib_frag  = NULL;
		
		const char *section  = NULL;
		const char *op   = cl_get_string(cib_msg->msg, F_CRM_TASK);

		section  = cl_get_string(cib_msg->msg, F_CIB_SECTION);
		
		ha_msg_value_int(cib_msg->msg, F_CIB_CALLOPTS, &call_options);

		crm_log_message(LOG_MSG, cib_msg->msg);
		crm_log_xml_debug_3(cib_msg->xml, "[CIB update]");
		if(op == NULL) {
			crm_err("Invalid CIB Message");
			register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
			return;
		}

		cib_frag = NULL;
		rc = fsa_cib_conn->cmds->variant_op(
			fsa_cib_conn, op, NULL, section,
			cib_msg->xml, &cib_frag, call_options);

		if(rc < cib_ok || (action & A_CIB_INVOKE)) {
			answer = create_reply(cib_msg->msg, cib_frag);
			ha_msg_add(answer,XML_ATTR_RESULT,cib_error2string(rc));
		}
		
		if(action & A_CIB_INVOKE) {
			if(relay_message(answer, TRUE) == FALSE) {
				crm_err("Confused what to do with cib result");
				crm_log_message(LOG_ERR, answer);
				crm_msg_del(answer);
				register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
				return;
			}

		} else if(rc < cib_ok) {
			ha_msg_input_t *input = NULL;
			crm_err("Internal CRM/CIB command from %s() failed: %s",
				msg_data->origin, cib_error2string(rc));
			crm_log_message_adv(LOG_WARNING, "CIB Input", cib_msg->msg);
			crm_log_message_adv(LOG_WARNING, "CIB Reply", answer);
			
			input = new_ha_msg_input(answer);
			register_fsa_input(C_FSA_INTERNAL, I_ERROR, input);
			crm_msg_del(answer);
			delete_ha_msg_input(input);
		}

	} else {
		crm_err("Unexpected action %s in %s",
			fsa_action2string(action), __FUNCTION__);
	}
}
예제 #19
0
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
                       const char *abort_text, xmlNode * reason, const char *fn, int line)
{
    int add[] = { 0, 0, 0 };
    int del[] = { 0, 0, 0 };
    int level = LOG_INFO;
    xmlNode *diff = NULL;
    xmlNode *change = NULL;

    CRM_CHECK(transition_graph != NULL, return);

    switch (fsa_state) {
        case S_STARTING:
        case S_PENDING:
        case S_NOT_DC:
        case S_HALT:
        case S_ILLEGAL:
        case S_STOPPING:
        case S_TERMINATE:
            crm_info("Abort %s suppressed: state=%s (complete=%d)",
                     abort_text, fsa_state2string(fsa_state), transition_graph->complete);
            return;
        default:
            break;
    }

    /* Make sure any queued calculations are discarded ASAP */
    free(fsa_pe_ref);
    fsa_pe_ref = NULL;

    if (transition_graph->complete == FALSE) {
        if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
            level = LOG_NOTICE;
        }
    }

    if(reason) {
        xmlNode *search = NULL;

        for(search = reason; search; search = search->parent) {
            if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
                diff = search;
                break;
            }
        }

        if(diff) {
            xml_patch_versions(diff, add, del);
            for(search = reason; search; search = search->parent) {
                if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
                    change = search;
                    break;
                }
            }
        }
    }

    if(reason == NULL) {
        do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)",
                   abort_text, fn, line, transition_graph->complete);

    } else if(change == NULL) {
        char *local_path = xml_get_path(reason);

        do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                   TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete);
        free(local_path);

    } else {
        const char *kind = NULL;
        const char *op = crm_element_value(change, XML_DIFF_OP);
        const char *path = crm_element_value(change, XML_DIFF_PATH);

        if(change == reason) {
            if(strcmp(op, "create") == 0) {
                reason = reason->children;

            } else if(strcmp(op, "modify") == 0) {
                reason = first_named_child(reason, XML_DIFF_RESULT);
                if(reason) {
                    reason = reason->children;
                }
            }
        }

        kind = TYPE(reason);
        if(strcmp(op, "delete") == 0) {
            const char *shortpath = strrchr(path, '/');

            do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                       shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);

        } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { 
            do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                       crm_element_value(reason, XML_ATTR_ID),
                       crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
                       crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
                       abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete);

        } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
            const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);

            do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)",
                       crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
                       crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
                       magic, add[0], add[1], add[2], fn, line, transition_graph->complete);

        } else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
                   || safe_str_eq(XML_CIB_TAG_NODE, kind)) {
            const char *uname = crm_peer_uname(ID(reason));

            do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)",
                       kind, op, uname ? uname : ID(reason), abort_text,
                       add[0], add[1], add[2], fn, line, transition_graph->complete);

        } else {
            do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
                       TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
        }
    }

    if (transition_graph->complete) {
        if (transition_timer->period_ms > 0) {
            crm_timer_stop(transition_timer);
            crm_timer_start(transition_timer);
        } else {
            register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
        }
        return;
    }

    mainloop_set_trigger(transition_trigger);
}
예제 #20
0
/*
  crmd起動初期処理


 C_で表現さえる原因は以下が用意されている
 
enum crmd_fsa_cause
{
	C_UNKNOWN = 0,
	C_STARTUP,
	C_IPC_MESSAGE,
	C_HA_MESSAGE,
	C_CCM_CALLBACK,
	C_CRMD_STATUS_CALLBACK,
	C_LRM_OP_CALLBACK,
	C_LRM_MONITOR_CALLBACK,
	C_TIMER_POPPED,
	C_SHUTDOWN,
	C_HEARTBEAT_FAILED,
	C_SUBSYSTEM_CONNECT,
	C_HA_DISCONNECT,
	C_FSA_INTERNAL,
	C_ILLEGAL
};
*/
int
crmd_init(void)
{
    int exit_code = 0;
    enum crmd_fsa_state state;

	/* 最初のfsa_stateをS_STARTINGにセット */
    fsa_state = S_STARTING;
    fsa_input_register = 0; /* zero out the regester */

    init_dotfile();
    crm_info("Starting %s", crm_system_name);
	/* Start時の自処理入力データを作成する */
	/*
	fsa_data->id        = last_data_id;
	fsa_data->fsa_input = I_STARTUP;
	fsa_data->fsa_cause = C_STARTUP;
	fsa_data->origin    = raised_from;
	fsa_data->data      = NULL;
	fsa_data->data_type = fsa_dt_none;
	fsa_data->actions   = with_actions;
	
	 #define register_fsa_input(cause, input, data) register_fsa_input_adv(cause, input, data, A_NOTHING, FALSE, __FUNCTION__)
	*/
    register_fsa_input(C_STARTUP, I_STARTUP, NULL);

    crm_peer_init();
    
    /* 最初のS_STARTING/C_STARTUP/I_STARTUP状態を処理する */
    state = s_crmd_fsa(C_STARTUP);
    
    if (state == S_PENDING || state == S_STARTING) {
	    /* Create the mainloop and run it... */
	    crmd_mainloop = g_main_new(FALSE);
	    crm_info("Starting %s's mainloop", crm_system_name);
	    
#ifdef REALTIME_SUPPORT
	    static int  crm_realtime = 1;
	    if (crm_realtime == 1){
		    cl_enable_realtime();
	    }else if (crm_realtime == 0){
		    cl_disable_realtime();
	    }
	    cl_make_realtime(SCHED_RR, 5, 64, 64);
#endif
		/* メインループ開始 */
	    g_main_run(crmd_mainloop);
	    if(is_set(fsa_input_register, R_STAYDOWN)) {
		    crm_info("Inhibiting respawn by Heartbeat");
		    exit_code = 100;
	    }

    } else {
	    crm_err("Startup of %s failed.  Current state: %s",
		    crm_system_name, fsa_state2string(state));
	    exit_code = 1;
    }
    
    crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
    return exit_code;
}
void
do_election_count_vote(long long action,
		       enum crmd_fsa_cause cause,
		       enum crmd_fsa_state cur_state,
		       enum crmd_fsa_input current_input,
		       fsa_data_t *msg_data)
{
	int election_id = -1;
	int log_level = LOG_INFO;
	gboolean done = FALSE;
	gboolean we_loose = FALSE;
	const char *op             = NULL;	
	const char *vote_from      = NULL;
	const char *your_version   = NULL;
	const char *election_owner = NULL;
	const char *reason	   = "unknown";
	crm_node_t *our_node = NULL, *your_node = NULL;
	ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

	static time_t last_election_win = 0;
	static time_t last_election_loss = 0;
	
	/* if the membership copy is NULL we REALLY shouldnt be voting
	 * the question is how we managed to get here.
	 */
	
	CRM_CHECK(msg_data != NULL, return);
	CRM_CHECK(crm_peer_cache != NULL, return);
	CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
	CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
	
	/* 受信メッセージデータを取り出す */
	op             = crm_element_value(vote->msg, F_CRM_TASK);
	vote_from      = crm_element_value(vote->msg, F_CRM_HOST_FROM);
	your_version   = crm_element_value(vote->msg, F_CRM_VERSION);
	election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
	crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);

	CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);
	
	/* CRM_OP_VOTEメッセージの送信元のノード情報を取得する */
	your_node = crm_get_peer(0, vote_from);
	/* 自ノードのノード情報を取得する */
	our_node = crm_get_peer(0, fsa_our_uname);
	
 	if(voted == NULL) {
		crm_debug("Created voted hash");
		/* votedハッシュテーブルが未作成の場合は作成する */
 		voted = g_hash_table_new_full(
			g_str_hash, g_str_equal,
			g_hash_destroy_str, g_hash_destroy_str);
 	}
	
	if(cur_state == S_STARTING) {
		/* 自ノードの状態が、まだ、S_STARTING状態の場合は、DCになれないのでCRM_OP_NOVOTEメッセージを送信する */
	    reason = "Still starting";
	    we_loose = TRUE;
	
	} else if(our_node == NULL || crm_is_member_active(our_node) == FALSE) {
		/* 自ノードがまだクラスタ構成として認識されていないか、アクティブでない場合は */
		/* DCになれないのでCRM_OP_NOVOTEメッセージを送信する */
	    reason = "We are not part of the cluster";
	    log_level = LOG_ERR;
	    we_loose = TRUE;

	} else if(your_node == NULL || crm_is_member_active(your_node) == FALSE) {
	    /* CRM_OP_VOTEメッセージの送信元のノードがクラスタ構成として認識されていないか、アクティブでない場合は */
	    /* ログのみを出力する */
	    reason = "Peer is not part of our cluster";
	    log_level = LOG_WARNING;
	    done = TRUE;

	} else if(election_id != current_election_id
	    && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
		/* 現在のelection_idと受信したelection_idが違う場合も、ログのみ出力する */
	    log_level = LOG_DEBUG_2;
	    reason = "Superceeded";
	    done = TRUE;

	} else if(crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
		/* DCになれないと思ったノードが送信したCRM_OP_NOVOTEメッセージの場合 */
	    char *op_copy = crm_strdup(op);
	    char *uname_copy = crm_strdup(vote_from);
	    CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));
	    
	    /* update the list of nodes that have voted */
		/* votedハッシュテーブルにノードデータをセットする */
	    g_hash_table_replace(voted, uname_copy, op_copy);
	    reason = "Recorded";
	    done = TRUE;

	} else if(crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
		/* 自ノードが送信したCRM_OP_VOTEメッセージを処理する場合 */
	    char *op_copy = crm_strdup(op);
	    char *uname_copy = crm_strdup(vote_from);
	    CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

	    /* update ourselves in the list of nodes that have voted */
		/* votedハッシュテーブルにノードデータをセットする */
	    g_hash_table_replace(voted, uname_copy, op_copy);
	    reason = "Recorded";
	    done = TRUE;
	    
	} else if(compare_version(your_version, CRM_FEATURE_SET) < 0) {
		/* 受信したCRM_OP_VOTEメッセージの送り元のversionがCRM_FEATURE_SETよりも小さい場合 */
	    /* 自ノードは、DCになれない */
	    reason = "Version";
	    we_loose = TRUE;
		
	} else if(compare_version(your_version, CRM_FEATURE_SET) > 0) {
		/* 受信したCRM_OP_VOTEメッセージの送り元のversionがCRM_FEATURE_SETよりも大きい場合、ログのみ出力する */
	    reason = "Version";
	    
	} else if(your_node->born < our_node->born) {
		/* 受信したCRM_OP_VOTEメッセージの送り元の方がbornが自ノードよりも小さい場合 */
	    reason = "Age";
	    /* 自ノードは、DCになれない */
	    we_loose = TRUE;
	    
	} else if(your_node->born > our_node->born) {
		/* 受信したCRM_OP_VOTEメッセージの送り元の方がbornが自ノードよりも大きい場合 */
	    /* 自ノードは、DCの候補 */
	    reason = "Age";

	} else if(fsa_our_uname == NULL) {
		/* 自ノードのノード名称がセットされていない場合 */
	    /* 自ノードは、DCになれない */
	    reason = "Unknown host name";
	    we_loose = TRUE;
	    
	} else if(strcasecmp(fsa_our_uname, vote_from) > 0) {
		/* 自ノードのノード名が送信元...*/
	    /* 自ノードは、DCになれない */
	    reason = "Host name";
	    we_loose = TRUE;
	    
	} else {
		/* その他の場合 */
	    reason = "Host name";
	    CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
	}

	if(done) {
	    do_crm_log(log_level+1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
		       election_id, current_election_id, election_owner, op, vote_from, reason);
	    
	} else if(we_loose) {
		/* born値の比較などから、DCノードになれないと判断した場合 */
		
		/* CRM_OP_NOVOTEメッセージをCRMD宛に生成する */
		xmlNode *novote = create_request(
			CRM_OP_NOVOTE, NULL, vote_from,
			CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

		do_crm_log(log_level+1, "Election %d (owner: %s) lost: %s from %s (%s)",
			   election_id, election_owner, op, vote_from, reason);
		
		/* DCノードにNULLをセットする */
		update_dc(NULL);
		
		/* election_timeoutタイマーを止める */
		crm_timer_stop(election_timeout);
		
		if(fsa_input_register & R_THE_DC) {
			crm_debug_3("Give up the DC to %s", vote_from);
			register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
			
		} else if(cur_state != S_STARTING) {
			crm_debug_3("We werent the DC anyway");
			register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
		}

		/* CRM_OP_NOVOTEメッセージのF_CRM_ELECTION_OWNERに受信メッセージのelection_ownerをセットする */
		crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
		/* CRM_OP_NOVOTEメッセージのF_CRM_ELECTION_IDに受信メッセージのelection_idをセットする */
		crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
		
		/* CRM_OP_VOTEメッセージの送信元にCRM_OP_NOVOTEメッセージを送信する */
		send_cluster_message(vote_from, crm_msg_crmd, novote, TRUE);
		
		/* 送信メッセージを解放する */
		free_xml(novote);

		/* CIBのset_slave処理を実行する */
		fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

		last_election_loss = time(NULL);
		last_election_win = 0;

	} else {
	    do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
		     election_id, election_owner, op, vote_from, reason);

	    if(last_election_loss) {
			time_t tm_now = time(NULL);
			if(tm_now - last_election_loss < (time_t)loss_dampen) {
		    	crm_info("Election %d ignore: We already lost an election less than %ds ago",
			      election_id, loss_dampen);
		    	update_dc(NULL);
		    return;
			}
			last_election_loss = 0;
	    }

#if 0
	    /* Enabling this code can lead to multiple DCs during SimulStart.
	     * Specifically when a node comes up after our last 'win' vote.
	     *
	     * Fixing and enabling this functionality might become important when
	     * we start running realy big clusters, but for now leave it disabled.
	     */
	    if(last_election_win) {
		time_t tm_now = time(NULL);
		if(tm_now - last_election_win < (time_t)win_dampen) {
		    crm_info("Election %d ignore: We already won an election less than %ds ago",
			      election_id, win_dampen);
		    return;
		}
	    }

	    last_election_win = time(NULL);
#endif
		/* I_ELECTIONへ */
	    register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
	    g_hash_table_destroy(voted);
	    voted = NULL;
	}	
}
void
notify_crmd(crm_graph_t *graph)
{
	int log_level = LOG_DEBUG;
	const char *type = "unknown";
	enum crmd_fsa_input event = I_NULL;
	
	crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
	
	CRM_CHECK(graph->complete, graph->complete = TRUE);

	switch(graph->completion_action) {
		case tg_stop:
		    type = "stop";
		    /* fall through */
		case tg_done:
		    type = "done";
		    log_level = LOG_INFO;
		    if(fsa_state == S_TRANSITION_ENGINE) {
			event = I_TE_SUCCESS;
		    }
		    break;
		    
		case tg_restart:
		    type = "restart";
		    if(fsa_state == S_TRANSITION_ENGINE) {
			if(transition_timer->period_ms > 0) {
			    crm_timer_start(transition_timer);
			} else {
			    event = I_PE_CALC;
			}

		    } else if(fsa_state == S_POLICY_ENGINE) {
				/* fsa_actionにA_PE_INVOKEアクションを追加して、fsa_sourceトリガーを叩いてcrmdに通知する */
				register_fsa_action(A_PE_INVOKE);
		    }
		    break;

		case tg_shutdown:
		    type = "shutdown";
		    if(is_set(fsa_input_register, R_SHUTDOWN)) {
			event = I_STOP;			
			
		    } else {
			event = I_TERMINATE;
		    }
	}

	te_log_action(log_level, "Transition %d status: %s - %s",
		      graph->id, type, crm_str(graph->abort_reason));

	graph->abort_reason = NULL;
	graph->completion_action = tg_done;
	clear_bit_inplace(fsa_input_register, R_IN_TRANSITION);

	if(event != I_NULL) {
	    register_fsa_input(C_FSA_INTERNAL, event, NULL);

	} else if(fsa_source) {
	    mainloop_set_trigger(fsa_source);
	}
}
예제 #23
0
/*	A_ELECTION_VOTE	*/
void
do_election_vote(long long action,
                 enum crmd_fsa_cause cause,
                 enum crmd_fsa_state cur_state,
                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval age;
    xmlNode *vote = NULL;
    gboolean not_voting = FALSE;

    /* don't vote if we're in one of these states or wanting to shut down */
    switch (cur_state) {
        case S_STARTING:
        case S_RECOVERY:
        case S_STOPPING:
        case S_TERMINATE:
            crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
            not_voting = TRUE;
            break;
        default:
            break;
    }

    if (not_voting == FALSE) {
        if (is_set(fsa_input_register, R_STARTING)) {
            not_voting = TRUE;
        }
    }

    if (not_voting) {
        if (AM_I_DC) {
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else {
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }
        return;
    }

    vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

    current_election_id++;
    crm_xml_add(vote, F_CRM_ELECTION_OWNER, fsa_our_uuid);
    crm_xml_add_int(vote, F_CRM_ELECTION_ID, current_election_id);

    crm_uptime(&age);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
    crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);

    send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
    free_xml(vote);

    crm_debug("Started election %d", current_election_id);
    if (voted) {
        g_hash_table_destroy(voted);
    }
    voted = NULL;

    if (cur_state == S_ELECTION || cur_state == S_RELEASE_DC) {
        crm_timer_start(election_timeout);

    } else if (cur_state != S_INTEGRATION) {
        crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
    }

    return;
}
예제 #24
0
/*	A_ELECTION_COUNT	*/
void
do_election_count_vote(long long action,
                       enum crmd_fsa_cause cause,
                       enum crmd_fsa_state cur_state,
                       enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
    struct timeval your_age;
    int age;
    int election_id = -1;
    int log_level = LOG_INFO;
    gboolean use_born_on = FALSE;
    gboolean done = FALSE;
    gboolean we_loose = FALSE;
    const char *op = NULL;
    const char *vote_from = NULL;
    const char *your_version = NULL;
    const char *election_owner = NULL;
    const char *reason = "unknown";
    crm_node_t *our_node = NULL, *your_node = NULL;
    ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);

    static time_t last_election_loss = 0;

    /* if the membership copy is NULL we REALLY shouldnt be voting
     * the question is how we managed to get here.
     */

    CRM_CHECK(msg_data != NULL, return);
    CRM_CHECK(crm_peer_cache != NULL, return);
    CRM_CHECK(vote != NULL, crm_err("Bogus data from %s", msg_data->origin); return);
    CRM_CHECK(vote->msg != NULL, crm_err("Bogus data from %s", msg_data->origin); return);

    your_age.tv_sec = 0;
    your_age.tv_usec = 0;

    op = crm_element_value(vote->msg, F_CRM_TASK);
    vote_from = crm_element_value(vote->msg, F_CRM_HOST_FROM);
    your_version = crm_element_value(vote->msg, F_CRM_VERSION);
    election_owner = crm_element_value(vote->msg, F_CRM_ELECTION_OWNER);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_ID, &election_id);
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_S, (int *)&(your_age.tv_sec));
    crm_element_value_int(vote->msg, F_CRM_ELECTION_AGE_US, (int *)&(your_age.tv_usec));

    CRM_CHECK(vote_from != NULL, vote_from = fsa_our_uname);

    your_node = crm_get_peer(0, vote_from);
    our_node = crm_get_peer(0, fsa_our_uname);

    if (voted == NULL) {
        crm_debug("Created voted hash");
        voted = g_hash_table_new_full(crm_str_hash, g_str_equal,
                                      g_hash_destroy_str, g_hash_destroy_str);
    }

    if (is_heartbeat_cluster()) {
        use_born_on = TRUE;
    } else if (is_classic_ais_cluster()) {
        use_born_on = TRUE;
    }

    age = crm_compare_age(your_age);

    if (cur_state == S_STARTING) {
        reason = "Still starting";
        we_loose = TRUE;

    } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
        reason = "We are not part of the cluster";
        log_level = LOG_ERR;
        we_loose = TRUE;

    } else if (election_id != current_election_id && crm_str_eq(fsa_our_uuid, election_owner, TRUE)) {
        log_level = LOG_DEBUG_2;
        reason = "Superceeded";
        done = TRUE;

    } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
        /* Possibly we cached the message in the FSA queue at a point that it wasn't */
        reason = "Peer is not part of our cluster";
        log_level = LOG_WARNING;
        done = TRUE;

    } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (crm_str_eq(vote_from, fsa_our_uname, TRUE)) {
        char *op_copy = strdup(op);
        char *uname_copy = strdup(vote_from);

        CRM_ASSERT(crm_str_eq(fsa_our_uuid, election_owner, TRUE));

        /* update ourselves in the list of nodes that have voted */
        g_hash_table_replace(voted, uname_copy, op_copy);
        reason = "Recorded";
        done = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
        reason = "Version";
        we_loose = TRUE;

    } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
        reason = "Version";

    } else if (age < 0) {
        reason = "Uptime";
        we_loose = TRUE;

    } else if (age > 0) {
        reason = "Uptime";

        /* TODO: Check for y(our) born < 0 */
    } else if (use_born_on && your_node->born < our_node->born) {
        reason = "Born";
        we_loose = TRUE;

    } else if (use_born_on && your_node->born > our_node->born) {
        reason = "Born";

    } else if (fsa_our_uname == NULL) {
        reason = "Unknown host name";
        we_loose = TRUE;

    } else if (strcasecmp(fsa_our_uname, vote_from) > 0) {
        reason = "Host name";
        we_loose = TRUE;

    } else {
        reason = "Host name";
        CRM_ASSERT(strcmp(fsa_our_uname, vote_from) != 0);
/* cant happen...
 *	} else if(strcasecmp(fsa_our_uname, vote_from) == 0) {
 *
 * default...
 *	} else { // strcasecmp(fsa_our_uname, vote_from) < 0
 *		we win
 */
    }

    if (done) {
        do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
                   election_id, current_election_id, election_owner, op, vote_from, reason);

    } else if (we_loose) {
        xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote_from,
                                         CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);

        do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);
        update_dc(NULL);

        crm_timer_stop(election_timeout);
        if (fsa_input_register & R_THE_DC) {
            crm_trace("Give up the DC to %s", vote_from);
            register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);

        } else if (cur_state != S_STARTING) {
            crm_trace("We werent the DC anyway");
            register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
        }

        crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
        crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);

        send_cluster_message(crm_get_peer(0, vote_from), crm_msg_crmd, novote, TRUE);
        free_xml(novote);

        fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);

        last_election_loss = time(NULL);

    } else {
        do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
                   election_id, election_owner, op, vote_from, reason);

        if (last_election_loss) {
            time_t tm_now = time(NULL);

            if (tm_now - last_election_loss < (time_t) loss_dampen) {
                crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
                         election_id, loss_dampen, ctime(&last_election_loss));
                update_dc(NULL);
                return;
            }
            last_election_loss = 0;
        }

        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
        g_hash_table_destroy(voted);
        voted = NULL;
    }
}
예제 #25
0
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    const char *status = NULL;

    set_bit(fsa_input_register, R_PEER_DATA);
    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also wont be in the status section */
            crm_info("%s is now %s", node->uname, node->state);
            return;
        case crm_status_rstate:
            crm_info("Remote node %s is now %s (was %s)", node->uname, node->state, (const char *)data);
            /* Keep going */
        case crm_status_nstate:
            crm_info("%s is now %s (was %s)", node->uname, node->state, (const char *)data);
            if (safe_str_eq(data, node->state)) {
                /* State did not change */
                return;
            } else if(safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
            }
            break;
        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            /* crmd_proc_update(node, proc_flags); */
            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = crm_is_peer_active(node);
        crm_action_t *down = match_down_event(0, node->uuid, NULL, appeared);

        crm_trace("Alive=%d, appear=%d, down=%p", alive, appeared, down);

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (alive && safe_str_eq(task, CRM_OP_FENCE)) {
                crm_info("Node return implies stonith of %s (action %d) completed", node->uname,
                         down->id);
                erase_status_tag(node->uname, XML_CIB_TAG_LRM, cib_scope_local);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                down->sent_update = TRUE;       /* Prevent tengine_stonith_callback() from calling send_stonith_update() */

            } else if (safe_str_eq(task, CRM_OP_FENCE)) {
                crm_trace("Waiting for stonithd to report the fencing of %s is complete", node->uname); /* via tengine_stonith_callback() */

            } else if (alive == FALSE) {
                crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id);
                /* down->confirmed = TRUE; Only stonith-ng returning should imply completion */
                stop_te_timer(down->timer);

                flags |= node_update_join | node_update_expected;
                crmd_peer_down(node, FALSE);
                check_join_state(fsa_state, __FUNCTION__);

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Other %p", down);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            crm_update_peer_join(__FUNCTION__, node, crm_join_none);
            check_join_state(fsa_state, __FUNCTION__);

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Other %p", down);
        }

        update = do_update_node_cib(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}
예제 #26
0
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
    uint32_t old = 0;
    uint32_t changed = 0;
    bool appeared = FALSE;
    bool is_remote = is_set(node->flags, crm_remote_node);
    const char *status = NULL;

    /* Crmd waits to receive some information from the membership layer before
     * declaring itself operational. If this is being called for a cluster node,
     * indicate that we have it.
     */
    if (!is_remote) {
        set_bit(fsa_input_register, R_PEER_DATA);
    }

    if (node->uname == NULL) {
        return;
    }

    switch (type) {
        case crm_status_uname:
            /* If we've never seen the node, then it also won't be in the status section */
            crm_info("%s node %s is now %s",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state));
            return;

        case crm_status_rstate:
        case crm_status_nstate:
            /* This callback should not be called unless the state actually
             * changed, but here's a failsafe just in case.
             */
            CRM_CHECK(safe_str_neq(data, node->state), return);

            crm_info("%s node %s is now %s (was %s)",
                     (is_remote? "Remote" : "Cluster"),
                     node->uname, state_text(node->state), state_text(data));

            if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
                appeared = TRUE;
                if (!is_remote) {
                    remove_stonith_cleanup(node->uname);
                }
            }

            crmd_alert_node_event(node);
            break;

        case crm_status_processes:
            if (data) {
                old = *(const uint32_t *)data;
                changed = node->processes ^ old;
            }

            status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
            crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
                     node->uname, peer2text(proc_flags), status,
                     AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);

            if ((changed & proc_flags) == 0) {
                /* Peer process did not change */
                crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
                return;
            } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
                crm_trace("Not connected");
                return;
            } else if (fsa_state == S_STOPPING) {
                crm_trace("Stopping");
                return;
            }

            appeared = (node->processes & proc_flags) != 0;
            if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
                /* Did we get evicted? */
                crm_notice("Our peer connection failed");
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);

            } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
                /* Did the DC leave us? */
                crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
                register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);

                /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
                 * want to fence it. Newer DCs will send their shutdown request
                 * to all peers, who will update the DC's expected state to
                 * down, thus avoiding fencing. We can safely erase the DC's
                 * transient attributes when it leaves in that case. However,
                 * the only way to avoid fencing older DCs is to leave the
                 * transient attributes intact until it rejoins.
                 */
                if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
                }

            } else if(AM_I_DC && appeared == FALSE) {
                crm_info("Peer %s left us", node->uname);
                erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
            }
            break;
    }

    if (AM_I_DC) {
        xmlNode *update = NULL;
        int flags = node_update_peer;
        gboolean alive = is_remote? appeared : crm_is_peer_active(node);
        crm_action_t *down = match_down_event(node->uuid, appeared);

        crm_trace("Alive=%d, appeared=%d, down=%d",
                  alive, appeared, (down? down->id : -1));

        if (alive && type == crm_status_processes) {
            register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
        }

        if (down) {
            const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);

            if (safe_str_eq(task, CRM_OP_FENCE)) {

                /* tengine_stonith_callback() confirms fence actions */
                crm_trace("Updating CIB %s stonithd reported fencing of %s complete",
                          (down->confirmed? "after" : "before"), node->uname);

            } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
                crm_notice("%s of peer %s is complete "CRM_XS" op=%d",
                           task, node->uname, down->id);

                /* down->confirmed = TRUE; */
                stop_te_timer(down->timer);

                if (!is_remote) {
                    flags |= node_update_join | node_update_expected;
                    crmd_peer_down(node, FALSE);
                    check_join_state(fsa_state, __FUNCTION__);
                }

                update_graph(transition_graph, down);
                trigger_graph();

            } else {
                crm_trace("Node %s is %salive, was expected to %s (op %d)",
                          node->uname, (alive? "" : "not "), task, down->id);
            }

        } else if (appeared == FALSE) {
            crm_notice("Stonith/shutdown of %s not matched", node->uname);

            if (!is_remote) {
                crm_update_peer_join(__FUNCTION__, node, crm_join_none);
                check_join_state(fsa_state, __FUNCTION__);
            }

            abort_transition(INFINITY, tg_restart, "Node failure", NULL);
            fail_incompletable_actions(transition_graph, node->uuid);

        } else {
            crm_trace("Node %s came up, was not expected to be down",
                      node->uname);
        }

        if (is_remote) {
            /* A pacemaker_remote node won't have its cluster status updated
             * in the CIB by membership-layer callbacks, so do it here.
             */
            flags |= node_update_cluster;

            /* Trigger resource placement on newly integrated nodes */
            if (appeared) {
                abort_transition(INFINITY, tg_restart,
                                 "pacemaker_remote node integrated", NULL);
            }
        }

        /* Update the CIB node state */
        update = create_node_state_update(node, flags, NULL, __FUNCTION__);
        fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
                            cib_scope_local | cib_quorum_override | cib_can_create);
        free_xml(update);
    }

    trigger_fsa(fsa_source);
}