void replication_options::initialize() { dsn_app_info app_info; bool r = dsn_get_current_app_info(&app_info); dassert(r, "get current app info failed"); app_name = app_info.name; app_dir = app_info.data_dir; // slog_dir: // - if config[slog_dir] is empty: "app_dir/slog" // - else: "config[slog_dir]/app_name/slog" slog_dir = dsn_config_get_value_string("replication", "slog_dir", "", "shared log directory"); if (slog_dir.empty()) { slog_dir = app_dir; } else { slog_dir = utils::filesystem::path_combine(slog_dir, app_name); } slog_dir = utils::filesystem::path_combine(slog_dir, "slog"); // data_dirs // - if config[data_dirs] is empty: "app_dir/reps" // - else: "config[data_dirs]/app_name/reps" std::string dirs_str = dsn_config_get_value_string("replication", "data_dirs", "", "replica directory list"); std::vector<std::string> dirs; ::dsn::utils::split_args(dirs_str.c_str(), dirs, ','); if (dirs.empty()) { dirs.push_back(app_dir); } else { for (auto& dir : dirs) { dir = utils::filesystem::path_combine(dir, app_name); } } for (auto& dir : dirs) { data_dirs.push_back(utils::filesystem::path_combine(dir, "reps")); } prepare_timeout_ms_for_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_secondaries", prepare_timeout_ms_for_secondaries, "timeout (ms) for prepare message to secondaries in two phase commit" ); prepare_timeout_ms_for_potential_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_potential_secondaries", prepare_timeout_ms_for_potential_secondaries, "timeout (ms) for prepare message to potential secondaries in two phase commit" ); batch_write_disabled = dsn_config_get_value_bool("replication", "batch_write_disabled", batch_write_disabled, "whether to disable auto-batch of replicated write requests" ); staleness_for_commit = (int)dsn_config_get_value_uint64("replication", "staleness_for_commit", staleness_for_commit, "how many concurrent two phase commit rounds are allowed" ); max_mutation_count_in_prepare_list = (int)dsn_config_get_value_uint64("replication", "max_mutation_count_in_prepare_list", max_mutation_count_in_prepare_list, "maximum number of mutations in prepare list" ); mutation_2pc_min_replica_count = (int)dsn_config_get_value_uint64("replication", "mutation_2pc_min_replica_count", mutation_2pc_min_replica_count, "minimum number of alive replicas under which write is allowed" ); group_check_disabled = dsn_config_get_value_bool("replication", "group_check_disabled", group_check_disabled, "whether group check is disabled" ); group_check_interval_ms = (int)dsn_config_get_value_uint64("replication", "group_check_interval_ms", group_check_interval_ms, "every what period (ms) we check the replica healthness" ); checkpoint_disabled = dsn_config_get_value_bool("replication", "checkpoint_disabled", checkpoint_disabled, "whether checkpoint is disabled" ); checkpoint_interval_seconds = (int)dsn_config_get_value_uint64("replication", "checkpoint_interval_seconds", checkpoint_interval_seconds, "every what period (seconds) we do checkpoints for replicated apps" ); checkpoint_min_decree_gap = (int64_t)dsn_config_get_value_uint64("replication", "checkpoint_min_decree_gap", checkpoint_min_decree_gap, "minimum decree gap that triggers checkpoint" ); checkpoint_max_interval_hours = (int)dsn_config_get_value_uint64("replication", "checkpoint_max_interval_hours", checkpoint_max_interval_hours, "maximum time interval (hours) where a new checkpoint must be created" ); gc_disabled = dsn_config_get_value_bool("replication", "gc_disabled", gc_disabled, "whether to disable garbage collection" ); gc_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_interval_ms", gc_interval_ms, "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc." ); gc_memory_replica_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_memory_replica_interval_ms", gc_memory_replica_interval_ms, "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover" ); gc_disk_error_replica_interval_seconds = (int)dsn_config_get_value_uint64("replication", "gc_disk_error_replica_interval_seconds", gc_disk_error_replica_interval_seconds, "error replica are deleted after they have been closed and lasted on disk this long (seconds)" ); fd_disabled = dsn_config_get_value_bool("replication", "fd_disabled", fd_disabled, "whether to disable failure detection" ); fd_check_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_check_interval_seconds", fd_check_interval_seconds, "every this period(seconds) the FD will check healthness of remote peers" ); fd_beacon_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_beacon_interval_seconds", fd_beacon_interval_seconds, "every this period(seconds) the FD sends beacon message to remote peers" ); fd_lease_seconds = (int)dsn_config_get_value_uint64("replication", "fd_lease_seconds", fd_lease_seconds, "lease (seconds) get from remote FD master" ); fd_grace_seconds = (int)dsn_config_get_value_uint64("replication", "fd_grace_seconds", fd_grace_seconds, "grace (seconds) assigned to remote FD slaves (grace > lease)" ); log_private_disabled = dsn_config_get_value_bool("replication", "log_private_disabled", log_private_disabled, "whether to disable logging committed mutations for each app, which is used for easier learning" ); log_private_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_private_file_size_mb", log_private_file_size_mb, "private log maximum segment file size (MB)" ); log_private_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_private_batch_buffer_kb", log_private_batch_buffer_kb, "private log buffer size (KB) for batching incoming logs" ); log_private_force_flush = dsn_config_get_value_bool("replication", "log_private_force_flush", log_private_force_flush, "when write private log, whether to flush file after write done" ); log_shared_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_shared_file_size_mb", log_shared_file_size_mb, "shared log maximum segment file size (MB)" ); log_shared_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_batch_buffer_KB_shared", log_shared_batch_buffer_kb, "shared log buffer size (KB) for batching incoming logs" ); log_shared_force_flush = dsn_config_get_value_bool("replication", "log_shared_force_flush", log_shared_force_flush, "when write shared log, whether to flush file after write done" ); config_sync_disabled = dsn_config_get_value_bool("replication", "config_sync_disabled", config_sync_disabled, "whether to disable replica configuration periodical sync with the meta server" ); config_sync_interval_ms = (int)dsn_config_get_value_uint64("replication", "config_sync_interval_ms", config_sync_interval_ms, "every this period(ms) the replica syncs replica configuration with the meta server" ); lb_interval_ms = (int)dsn_config_get_value_uint64("replication", "lb_interval_ms", lb_interval_ms, "every this period(ms) the meta server will do load balance" ); write_empty_enabled = dsn_config_get_value_bool("replication", "write_empty_enabled", write_empty_enabled, "whether to enable empty write when no write requests are processed for more than group_check_period, default is true" ); read_meta_servers(); sanity_check(); }
error_code distributed_lock_service_zookeeper::initialize(const std::vector<std::string>& args) { if (args.empty()) { derror("need parameters: <lock_root>"); return ERR_INVALID_PARAMETERS; } const char* lock_root = args[0].c_str(); dsn_app_info node; if (!dsn_get_current_app_info(&node)) { derror("get current app info failed, can not init distributed_lock_service_zookeeper"); return ERR_CORRUPTION; } _session = zookeeper_session_mgr::instance().get_session(&node); _zoo_state = _session->attach(this, std::bind(&distributed_lock_service_zookeeper::on_zoo_session_evt, lock_srv_ptr(this), std::placeholders::_1) ); if (_zoo_state != ZOO_CONNECTED_STATE) { _waiting_attach.wait_for( zookeeper_session_mgr::fast_instance().timeout() ); if (_zoo_state != ZOO_CONNECTED_STATE) { dwarn("attach to zookeeper session timeout, distributed lock service initialized failed"); return ERR_TIMEOUT; } } std::vector<std::string> slices; utils::split_args(lock_root, slices, '/'); std::string current = ""; for (auto& str: slices) { utils::notify_event e; int zerr; current = current + "/" + str; zookeeper_session::zoo_opcontext* op = zookeeper_session::create_context(); op->_optype = zookeeper_session::ZOO_CREATE; op->_input._path = current; op->_callback_function = [&e, &zerr](zookeeper_session::zoo_opcontext* op) mutable { zerr = op->_output.error; e.notify(); }; _session->visit(op); e.wait(); if (zerr != ZOK && zerr != ZNODEEXISTS) { derror("create zk node failed, path = %s, err = %s", current.c_str(), zerror(zerr)); return from_zerror(zerr); } } _lock_root = current.empty() ? "/" : current; ddebug("init distributed_lock_service_zookeeper succeed, lock_root = %s", _lock_root.c_str()); // Notice: this reference is released in the finalize add_ref(); return ERR_OK; }