void replication_options::initialize() { prepare_timeout_ms_for_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_secondaries", prepare_timeout_ms_for_secondaries, "timeout (ms) for prepare message to secondaries in two phase commit" ); prepare_timeout_ms_for_potential_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_potential_secondaries", prepare_timeout_ms_for_potential_secondaries, "timeout (ms) for prepare message to potential secondaries in two phase commit" ); batch_write_disabled = dsn_config_get_value_bool("replication", "batch_write_disabled", batch_write_disabled, "whether to disable auto-batch of replicated write requests" ); staleness_for_commit = (int)dsn_config_get_value_uint64("replication", "staleness_for_commit", staleness_for_commit, "how many concurrent two phase commit rounds are allowed" ); max_mutation_count_in_prepare_list = (int)dsn_config_get_value_uint64("replication", "max_mutation_count_in_prepare_list", max_mutation_count_in_prepare_list, "maximum number of mutations in prepare list" ); mutation_2pc_min_replica_count = (int)dsn_config_get_value_uint64("replication", "mutation_2pc_min_replica_count", mutation_2pc_min_replica_count, "minimum number of alive replicas under which write is allowed" ); group_check_disabled = dsn_config_get_value_bool("replication", "group_check_disabled", group_check_disabled, "whether group check is disabled" ); group_check_interval_ms = (int)dsn_config_get_value_uint64("replication", "group_check_interval_ms", group_check_interval_ms, "every what period (ms) we check the replica healthness" ); checkpoint_disabled = dsn_config_get_value_bool("replication", "checkpoint_disabled", checkpoint_disabled, "whether checkpoint is disabled" ); checkpoint_interval_seconds = (int)dsn_config_get_value_uint64("replication", "checkpoint_interval_seconds", checkpoint_interval_seconds, "every what period (seconds) we do checkpoints for replicated apps" ); checkpoint_min_decree_gap = (int64_t)dsn_config_get_value_uint64("replication", "checkpoint_min_decree_gap", checkpoint_min_decree_gap, "minimum decree gap that triggers checkpoint" ); checkpoint_max_interval_hours = (int)dsn_config_get_value_uint64("replication", "checkpoint_max_interval_hours", checkpoint_max_interval_hours, "maximum time interval (hours) where a new checkpoint must be created" ); gc_disabled = dsn_config_get_value_bool("replication", "gc_disabled", gc_disabled, "whether to disable garbage collection" ); gc_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_interval_ms", gc_interval_ms, "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc." ); gc_memory_replica_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_memory_replica_interval_ms", gc_memory_replica_interval_ms, "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover" ); gc_disk_error_replica_interval_seconds = (int)dsn_config_get_value_uint64("replication", "gc_disk_error_replica_interval_seconds", gc_disk_error_replica_interval_seconds, "error replica are deleted after they have been closed and lasted on disk this long (seconds)" ); fd_disabled = dsn_config_get_value_bool("replication", "fd_disabled", fd_disabled, "whether to disable failure detection" ); fd_check_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_check_interval_seconds", fd_check_interval_seconds, "every this period(seconds) the FD will check healthness of remote peers" ); fd_beacon_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_beacon_interval_seconds", fd_beacon_interval_seconds, "every this period(seconds) the FD sends beacon message to remote peers" ); fd_lease_seconds = (int)dsn_config_get_value_uint64("replication", "fd_lease_seconds", fd_lease_seconds, "lease (seconds) get from remote FD master" ); fd_grace_seconds = (int)dsn_config_get_value_uint64("replication", "fd_grace_seconds", fd_grace_seconds, "grace (seconds) assigned to remote FD slaves (grace > lease)" ); log_private_disabled = dsn_config_get_value_bool("replication", "log_private_disabled", log_private_disabled, "whether to disable logging committed mutations for each app, which is used for easier learning" ); log_private_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_private_file_size_mb", log_private_file_size_mb, "private log maximum segment file size (MB)" ); log_private_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_private_batch_buffer_kb", log_private_batch_buffer_kb, "private log buffer size (KB) for batching incoming logs" ); log_private_force_flush = dsn_config_get_value_bool("replication", "log_private_force_flush", log_private_force_flush, "when write private log, whether to flush file after write done" ); log_shared_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_shared_file_size_mb", log_shared_file_size_mb, "shared log maximum segment file size (MB)" ); log_shared_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_batch_buffer_KB_shared", log_shared_batch_buffer_kb, "shared log buffer size (KB) for batching incoming logs" ); log_shared_force_flush = dsn_config_get_value_bool("replication", "log_shared_force_flush", log_shared_force_flush, "when write shared log, whether to flush file after write done" ); config_sync_disabled = dsn_config_get_value_bool("replication", "config_sync_disabled", config_sync_disabled, "whether to disable replica configuration periodical sync with the meta server" ); config_sync_interval_ms = (int)dsn_config_get_value_uint64("replication", "config_sync_interval_ms", config_sync_interval_ms, "every this period(ms) the replica syncs replica configuration with the meta server" ); lb_interval_ms = (int)dsn_config_get_value_uint64("replication", "lb_interval_ms", lb_interval_ms, "every this period(ms) the meta server will do load balance" ); read_meta_servers(); sanity_check(); }
void replication_options::initialize() { dsn_app_info app_info; bool r = dsn_get_current_app_info(&app_info); dassert(r, "get current app info failed"); app_name = app_info.name; app_dir = app_info.data_dir; // slog_dir: // - if config[slog_dir] is empty: "app_dir/slog" // - else: "config[slog_dir]/app_name/slog" slog_dir = dsn_config_get_value_string("replication", "slog_dir", "", "shared log directory"); if (slog_dir.empty()) { slog_dir = app_dir; } else { slog_dir = utils::filesystem::path_combine(slog_dir, app_name); } slog_dir = utils::filesystem::path_combine(slog_dir, "slog"); // data_dirs // - if config[data_dirs] is empty: "app_dir/reps" // - else: "config[data_dirs]/app_name/reps" std::string dirs_str = dsn_config_get_value_string("replication", "data_dirs", "", "replica directory list"); std::vector<std::string> dirs; ::dsn::utils::split_args(dirs_str.c_str(), dirs, ','); if (dirs.empty()) { dirs.push_back(app_dir); } else { for (auto& dir : dirs) { dir = utils::filesystem::path_combine(dir, app_name); } } for (auto& dir : dirs) { data_dirs.push_back(utils::filesystem::path_combine(dir, "reps")); } prepare_timeout_ms_for_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_secondaries", prepare_timeout_ms_for_secondaries, "timeout (ms) for prepare message to secondaries in two phase commit" ); prepare_timeout_ms_for_potential_secondaries = (int)dsn_config_get_value_uint64("replication", "prepare_timeout_ms_for_potential_secondaries", prepare_timeout_ms_for_potential_secondaries, "timeout (ms) for prepare message to potential secondaries in two phase commit" ); batch_write_disabled = dsn_config_get_value_bool("replication", "batch_write_disabled", batch_write_disabled, "whether to disable auto-batch of replicated write requests" ); staleness_for_commit = (int)dsn_config_get_value_uint64("replication", "staleness_for_commit", staleness_for_commit, "how many concurrent two phase commit rounds are allowed" ); max_mutation_count_in_prepare_list = (int)dsn_config_get_value_uint64("replication", "max_mutation_count_in_prepare_list", max_mutation_count_in_prepare_list, "maximum number of mutations in prepare list" ); mutation_2pc_min_replica_count = (int)dsn_config_get_value_uint64("replication", "mutation_2pc_min_replica_count", mutation_2pc_min_replica_count, "minimum number of alive replicas under which write is allowed" ); group_check_disabled = dsn_config_get_value_bool("replication", "group_check_disabled", group_check_disabled, "whether group check is disabled" ); group_check_interval_ms = (int)dsn_config_get_value_uint64("replication", "group_check_interval_ms", group_check_interval_ms, "every what period (ms) we check the replica healthness" ); checkpoint_disabled = dsn_config_get_value_bool("replication", "checkpoint_disabled", checkpoint_disabled, "whether checkpoint is disabled" ); checkpoint_interval_seconds = (int)dsn_config_get_value_uint64("replication", "checkpoint_interval_seconds", checkpoint_interval_seconds, "every what period (seconds) we do checkpoints for replicated apps" ); checkpoint_min_decree_gap = (int64_t)dsn_config_get_value_uint64("replication", "checkpoint_min_decree_gap", checkpoint_min_decree_gap, "minimum decree gap that triggers checkpoint" ); checkpoint_max_interval_hours = (int)dsn_config_get_value_uint64("replication", "checkpoint_max_interval_hours", checkpoint_max_interval_hours, "maximum time interval (hours) where a new checkpoint must be created" ); gc_disabled = dsn_config_get_value_bool("replication", "gc_disabled", gc_disabled, "whether to disable garbage collection" ); gc_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_interval_ms", gc_interval_ms, "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc." ); gc_memory_replica_interval_ms = (int)dsn_config_get_value_uint64("replication", "gc_memory_replica_interval_ms", gc_memory_replica_interval_ms, "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover" ); gc_disk_error_replica_interval_seconds = (int)dsn_config_get_value_uint64("replication", "gc_disk_error_replica_interval_seconds", gc_disk_error_replica_interval_seconds, "error replica are deleted after they have been closed and lasted on disk this long (seconds)" ); fd_disabled = dsn_config_get_value_bool("replication", "fd_disabled", fd_disabled, "whether to disable failure detection" ); fd_check_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_check_interval_seconds", fd_check_interval_seconds, "every this period(seconds) the FD will check healthness of remote peers" ); fd_beacon_interval_seconds = (int)dsn_config_get_value_uint64("replication", "fd_beacon_interval_seconds", fd_beacon_interval_seconds, "every this period(seconds) the FD sends beacon message to remote peers" ); fd_lease_seconds = (int)dsn_config_get_value_uint64("replication", "fd_lease_seconds", fd_lease_seconds, "lease (seconds) get from remote FD master" ); fd_grace_seconds = (int)dsn_config_get_value_uint64("replication", "fd_grace_seconds", fd_grace_seconds, "grace (seconds) assigned to remote FD slaves (grace > lease)" ); log_private_disabled = dsn_config_get_value_bool("replication", "log_private_disabled", log_private_disabled, "whether to disable logging committed mutations for each app, which is used for easier learning" ); log_private_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_private_file_size_mb", log_private_file_size_mb, "private log maximum segment file size (MB)" ); log_private_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_private_batch_buffer_kb", log_private_batch_buffer_kb, "private log buffer size (KB) for batching incoming logs" ); log_private_force_flush = dsn_config_get_value_bool("replication", "log_private_force_flush", log_private_force_flush, "when write private log, whether to flush file after write done" ); log_shared_file_size_mb = (int)dsn_config_get_value_uint64("replication", "log_shared_file_size_mb", log_shared_file_size_mb, "shared log maximum segment file size (MB)" ); log_shared_batch_buffer_kb = (int)dsn_config_get_value_uint64("replication", "log_batch_buffer_KB_shared", log_shared_batch_buffer_kb, "shared log buffer size (KB) for batching incoming logs" ); log_shared_force_flush = dsn_config_get_value_bool("replication", "log_shared_force_flush", log_shared_force_flush, "when write shared log, whether to flush file after write done" ); config_sync_disabled = dsn_config_get_value_bool("replication", "config_sync_disabled", config_sync_disabled, "whether to disable replica configuration periodical sync with the meta server" ); config_sync_interval_ms = (int)dsn_config_get_value_uint64("replication", "config_sync_interval_ms", config_sync_interval_ms, "every this period(ms) the replica syncs replica configuration with the meta server" ); lb_interval_ms = (int)dsn_config_get_value_uint64("replication", "lb_interval_ms", lb_interval_ms, "every this period(ms) the meta server will do load balance" ); write_empty_enabled = dsn_config_get_value_bool("replication", "write_empty_enabled", write_empty_enabled, "whether to enable empty write when no write requests are processed for more than group_check_period, default is true" ); read_meta_servers(); sanity_check(); }
screen_logger::screen_logger(const char* log_dir, logging_provider* inner) : logging_provider(log_dir, inner) { _short_header = dsn_config_get_value_bool("tools.screen_logger", "short_header", true, "whether to use short header (excluding file/function etc.)"); }
simple_logger::simple_logger(const char* log_dir, logging_provider* inner) : logging_provider(log_dir, inner) { _log_dir = std::string(log_dir); //we assume all valid entries are positive _start_index = 0; _index = 1; _lines = 0; _log = nullptr; _short_header = dsn_config_get_value_bool("tools.simple_logger", "short_header", true, "whether to use short header (excluding file/function etc.)"); _fast_flush = dsn_config_get_value_bool("tools.simple_logger", "fast_flush", false, "whether to flush immediately"); _stderr_start_level = enum_from_string( dsn_config_get_value_string("tools.simple_logger", "stderr_start_level", enum_to_string(LOG_LEVEL_WARNING), "copy log messages at or above this level to stderr in addition to logfiles"), LOG_LEVEL_INVALID ); dassert(_stderr_start_level != LOG_LEVEL_INVALID, "invalid [tools.simple_logger] stderr_start_level specified"); _max_number_of_log_files_on_disk = dsn_config_get_value_uint64( "tools.simple_logger", "max_number_of_log_files_on_disk", 20, "max number of log files reserved on disk, older logs are auto deleted" ); // check existing log files std::vector<std::string> sub_list; if (!dsn::utils::filesystem::get_subfiles(_log_dir, sub_list, false)) { dassert(false, "Fail to get subfiles in %s.", _log_dir.c_str()); } for (auto& fpath : sub_list) { auto&& name = dsn::utils::filesystem::get_file_name(fpath); if (name.length() <= 8 || name.substr(0, 4) != "log.") continue; int index; if (1 != sscanf(name.c_str(), "log.%d.txt", &index) || index <= 0) continue; if (index > _index) _index = index; if (_start_index == 0 || index < _start_index) _start_index = index; } sub_list.clear(); if (_start_index == 0) { _start_index = _index; } else ++_index; create_log_file(); }
void tracer::install(service_spec &spec) { auto trace = dsn_config_get_value_bool( "task..default", "is_trace", false, "whether to trace tasks by default"); for (int i = 0; i <= dsn::task_code::max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string section_name = std::string("task.") + std::string(dsn::task_code(i).to_string()); task_spec *spec = task_spec::get(i); dassert(spec != nullptr, "task_spec cannot be null"); if (!dsn_config_get_value_bool( section_name.c_str(), "is_trace", trace, "whether to trace this kind of task")) continue; if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_create", true, "whether to trace when a task is created")) spec->on_task_create.put_back(tracer_on_task_create, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_enqueue", true, "whether to trace when a timer or async task is enqueued")) spec->on_task_enqueue.put_back(tracer_on_task_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_begin", true, "whether to trace when a task begins")) spec->on_task_begin.put_back(tracer_on_task_begin, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_end", true, "whether to trace when a task ends")) spec->on_task_end.put_back(tracer_on_task_end, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancelled", true, "whether to trace when a task is cancelled")) spec->on_task_cancelled.put_back(tracer_on_task_cancelled, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_pre", true, "whether to trace when a task is to be wait")) spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_post", true, "whether to trace when a task is wait post")) spec->on_task_wait_post.put_back(tracer_on_task_wait_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancel_post", true, "whether to trace when a task is cancel post")) spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_call", true, "whether to trace when an aio task is called")) spec->on_aio_call.put_back(tracer_on_aio_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_enqueue", true, "whether to trace when an aio task is enqueued")) spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_call", true, "whether to trace when a rpc is made")) spec->on_rpc_call.put_back(tracer_on_rpc_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_request_enqueue", true, "whether to trace when a rpc request task is enqueued")) spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_reply", true, "whether to trace when reply a rpc request")) spec->on_rpc_reply.put_back(tracer_on_rpc_reply, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_response_enqueue", true, "whetehr to trace when a rpc response task is enqueued")) spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_create_response", true, "whetehr to trace when a rpc response task is created")) spec->on_rpc_create_response.put_back(tracer_on_rpc_create_response, "tracer"); } command_manager::instance().register_command( {"tracer.find"}, "tracer.find - find related logs", "tracer.find forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., " "a023003920302390) log_file_name(log.xx.txt)", tracer_log_flow); }