Exemplo n.º 1
0
void replication_options::initialize()
{
    prepare_timeout_ms_for_secondaries =
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_secondaries", 
        prepare_timeout_ms_for_secondaries,
        "timeout (ms) for prepare message to secondaries in two phase commit"
        );
    prepare_timeout_ms_for_potential_secondaries = 
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_potential_secondaries",
        prepare_timeout_ms_for_potential_secondaries,
        "timeout (ms) for prepare message to potential secondaries in two phase commit"
        );

    batch_write_disabled =
        dsn_config_get_value_bool("replication",
        "batch_write_disabled",
        batch_write_disabled,
        "whether to disable auto-batch of replicated write requests"
        );
    staleness_for_commit =
        (int)dsn_config_get_value_uint64("replication", 
        "staleness_for_commit", 
        staleness_for_commit,
        "how many concurrent two phase commit rounds are allowed"
        );
    max_mutation_count_in_prepare_list =
        (int)dsn_config_get_value_uint64("replication", 
        "max_mutation_count_in_prepare_list", 
        max_mutation_count_in_prepare_list,
        "maximum number of mutations in prepare list"
        );
    mutation_2pc_min_replica_count =
        (int)dsn_config_get_value_uint64("replication", 
        "mutation_2pc_min_replica_count",
        mutation_2pc_min_replica_count,
        "minimum number of alive replicas under which write is allowed"
        );

    group_check_disabled =
        dsn_config_get_value_bool("replication",
        "group_check_disabled",
        group_check_disabled,
        "whether group check is disabled"
        );
    group_check_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "group_check_interval_ms", 
        group_check_interval_ms,
        "every what period (ms) we check the replica healthness"
        );

    checkpoint_disabled =
        dsn_config_get_value_bool("replication",
        "checkpoint_disabled",
        checkpoint_disabled,
        "whether checkpoint is disabled"
        );
    checkpoint_interval_seconds =
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_interval_seconds",
        checkpoint_interval_seconds,
        "every what period (seconds) we do checkpoints for replicated apps"
        ); 
    checkpoint_min_decree_gap = 
        (int64_t)dsn_config_get_value_uint64("replication",
        "checkpoint_min_decree_gap",
        checkpoint_min_decree_gap,
        "minimum decree gap that triggers checkpoint"
        );
    checkpoint_max_interval_hours = 
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_max_interval_hours",
        checkpoint_max_interval_hours,
        "maximum time interval (hours) where a new checkpoint must be created"
        );

    gc_disabled =
        dsn_config_get_value_bool("replication",
        "gc_disabled",
        gc_disabled,
        "whether to disable garbage collection"
        );
    gc_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_interval_ms", 
        gc_interval_ms,
        "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc."
        );
    gc_memory_replica_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_memory_replica_interval_ms", 
        gc_memory_replica_interval_ms,
        "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover"
        );
    gc_disk_error_replica_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_disk_error_replica_interval_seconds", 
        gc_disk_error_replica_interval_seconds,
        "error replica are deleted after they have been closed and lasted on disk this long (seconds)"
        );

    fd_disabled =
        dsn_config_get_value_bool("replication",
        "fd_disabled",
        fd_disabled,
        "whether to disable failure detection"
        );
    fd_check_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_check_interval_seconds", 
        fd_check_interval_seconds,
        "every this period(seconds) the FD will check healthness of remote peers"
        );
    fd_beacon_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_beacon_interval_seconds", 
        fd_beacon_interval_seconds,
        "every this period(seconds) the FD sends beacon message to remote peers"
        );
    fd_lease_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_lease_seconds", 
        fd_lease_seconds,
        "lease (seconds) get from remote FD master"
        );
    fd_grace_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_grace_seconds", 
        fd_grace_seconds,
        "grace (seconds) assigned to remote FD slaves (grace > lease)"
        );

    log_private_disabled =
        dsn_config_get_value_bool("replication",
        "log_private_disabled",
        log_private_disabled,
        "whether to disable logging committed mutations for each app, which is used for easier learning"
        );
    log_private_file_size_mb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_file_size_mb",
        log_private_file_size_mb,
        "private log maximum segment file size (MB)"
        );
    log_private_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_batch_buffer_kb",
        log_private_batch_buffer_kb,
        "private log buffer size (KB) for batching incoming logs"
        );
    log_private_force_flush =
        dsn_config_get_value_bool("replication",
        "log_private_force_flush",
        log_private_force_flush,
        "when write private log, whether to flush file after write done"
        );

    log_shared_file_size_mb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_shared_file_size_mb",
        log_shared_file_size_mb,
        "shared log maximum segment file size (MB)"
        );
    log_shared_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_batch_buffer_KB_shared", 
        log_shared_batch_buffer_kb,
        "shared log buffer size (KB) for batching incoming logs"
        );
    log_shared_force_flush =
        dsn_config_get_value_bool("replication",
        "log_shared_force_flush",
        log_shared_force_flush,
        "when write shared log, whether to flush file after write done"
        );

    config_sync_disabled =
        dsn_config_get_value_bool("replication", 
        "config_sync_disabled",
        config_sync_disabled,
        "whether to disable replica configuration periodical sync with the meta server"
        );
    config_sync_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "config_sync_interval_ms", 
        config_sync_interval_ms,
        "every this period(ms) the replica syncs replica configuration with the meta server"
        );

    lb_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "lb_interval_ms",
        lb_interval_ms,
        "every this period(ms) the meta server will do load balance"
        );

    read_meta_servers();

    sanity_check();
}
Exemplo n.º 2
0
void replication_options::initialize()
{
    dsn_app_info app_info;
    bool r = dsn_get_current_app_info(&app_info);
    dassert(r, "get current app info failed");
    app_name = app_info.name;
    app_dir = app_info.data_dir;

    // slog_dir:
    // - if config[slog_dir] is empty: "app_dir/slog"
    // - else: "config[slog_dir]/app_name/slog"
    slog_dir = dsn_config_get_value_string("replication", "slog_dir", "", "shared log directory");
    if (slog_dir.empty())
    {
        slog_dir = app_dir;
    }
    else
    {
        slog_dir = utils::filesystem::path_combine(slog_dir, app_name);
    }
    slog_dir = utils::filesystem::path_combine(slog_dir, "slog");

    // data_dirs
    // - if config[data_dirs] is empty: "app_dir/reps"
    // - else: "config[data_dirs]/app_name/reps"
    std::string dirs_str = dsn_config_get_value_string("replication", "data_dirs", "", "replica directory list");
    std::vector<std::string> dirs;
    ::dsn::utils::split_args(dirs_str.c_str(), dirs, ',');
    if (dirs.empty())
    {
        dirs.push_back(app_dir);
    }
    else
    {
        for (auto& dir : dirs)
        {
            dir = utils::filesystem::path_combine(dir, app_name);
        }
    }
    for (auto& dir : dirs)
    {
        data_dirs.push_back(utils::filesystem::path_combine(dir, "reps"));
    }

    prepare_timeout_ms_for_secondaries =
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_secondaries", 
        prepare_timeout_ms_for_secondaries,
        "timeout (ms) for prepare message to secondaries in two phase commit"
        );
    prepare_timeout_ms_for_potential_secondaries = 
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_potential_secondaries",
        prepare_timeout_ms_for_potential_secondaries,
        "timeout (ms) for prepare message to potential secondaries in two phase commit"
        );

    batch_write_disabled =
        dsn_config_get_value_bool("replication",
        "batch_write_disabled",
        batch_write_disabled,
        "whether to disable auto-batch of replicated write requests"
        );
    staleness_for_commit =
        (int)dsn_config_get_value_uint64("replication", 
        "staleness_for_commit", 
        staleness_for_commit,
        "how many concurrent two phase commit rounds are allowed"
        );
    max_mutation_count_in_prepare_list =
        (int)dsn_config_get_value_uint64("replication", 
        "max_mutation_count_in_prepare_list", 
        max_mutation_count_in_prepare_list,
        "maximum number of mutations in prepare list"
        );
    mutation_2pc_min_replica_count =
        (int)dsn_config_get_value_uint64("replication", 
        "mutation_2pc_min_replica_count",
        mutation_2pc_min_replica_count,
        "minimum number of alive replicas under which write is allowed"
        );

    group_check_disabled =
        dsn_config_get_value_bool("replication",
        "group_check_disabled",
        group_check_disabled,
        "whether group check is disabled"
        );
    group_check_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "group_check_interval_ms", 
        group_check_interval_ms,
        "every what period (ms) we check the replica healthness"
        );

    checkpoint_disabled =
        dsn_config_get_value_bool("replication",
        "checkpoint_disabled",
        checkpoint_disabled,
        "whether checkpoint is disabled"
        );
    checkpoint_interval_seconds =
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_interval_seconds",
        checkpoint_interval_seconds,
        "every what period (seconds) we do checkpoints for replicated apps"
        ); 
    checkpoint_min_decree_gap = 
        (int64_t)dsn_config_get_value_uint64("replication",
        "checkpoint_min_decree_gap",
        checkpoint_min_decree_gap,
        "minimum decree gap that triggers checkpoint"
        );
    checkpoint_max_interval_hours = 
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_max_interval_hours",
        checkpoint_max_interval_hours,
        "maximum time interval (hours) where a new checkpoint must be created"
        );

    gc_disabled =
        dsn_config_get_value_bool("replication",
        "gc_disabled",
        gc_disabled,
        "whether to disable garbage collection"
        );
    gc_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_interval_ms", 
        gc_interval_ms,
        "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc."
        );
    gc_memory_replica_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_memory_replica_interval_ms", 
        gc_memory_replica_interval_ms,
        "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover"
        );
    gc_disk_error_replica_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_disk_error_replica_interval_seconds", 
        gc_disk_error_replica_interval_seconds,
        "error replica are deleted after they have been closed and lasted on disk this long (seconds)"
        );

    fd_disabled =
        dsn_config_get_value_bool("replication",
        "fd_disabled",
        fd_disabled,
        "whether to disable failure detection"
        );
    fd_check_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_check_interval_seconds", 
        fd_check_interval_seconds,
        "every this period(seconds) the FD will check healthness of remote peers"
        );
    fd_beacon_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_beacon_interval_seconds", 
        fd_beacon_interval_seconds,
        "every this period(seconds) the FD sends beacon message to remote peers"
        );
    fd_lease_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_lease_seconds", 
        fd_lease_seconds,
        "lease (seconds) get from remote FD master"
        );
    fd_grace_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_grace_seconds", 
        fd_grace_seconds,
        "grace (seconds) assigned to remote FD slaves (grace > lease)"
        );

    log_private_disabled =
        dsn_config_get_value_bool("replication",
        "log_private_disabled",
        log_private_disabled,
        "whether to disable logging committed mutations for each app, which is used for easier learning"
        );
    log_private_file_size_mb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_file_size_mb",
        log_private_file_size_mb,
        "private log maximum segment file size (MB)"
        );
    log_private_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_batch_buffer_kb",
        log_private_batch_buffer_kb,
        "private log buffer size (KB) for batching incoming logs"
        );
    log_private_force_flush =
        dsn_config_get_value_bool("replication",
        "log_private_force_flush",
        log_private_force_flush,
        "when write private log, whether to flush file after write done"
        );

    log_shared_file_size_mb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_shared_file_size_mb",
        log_shared_file_size_mb,
        "shared log maximum segment file size (MB)"
        );
    log_shared_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_batch_buffer_KB_shared", 
        log_shared_batch_buffer_kb,
        "shared log buffer size (KB) for batching incoming logs"
        );
    log_shared_force_flush =
        dsn_config_get_value_bool("replication",
        "log_shared_force_flush",
        log_shared_force_flush,
        "when write shared log, whether to flush file after write done"
        );

    config_sync_disabled =
        dsn_config_get_value_bool("replication", 
        "config_sync_disabled",
        config_sync_disabled,
        "whether to disable replica configuration periodical sync with the meta server"
        );
    config_sync_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "config_sync_interval_ms", 
        config_sync_interval_ms,
        "every this period(ms) the replica syncs replica configuration with the meta server"
        );

    lb_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "lb_interval_ms",
        lb_interval_ms,
        "every this period(ms) the meta server will do load balance"
        );

    write_empty_enabled =
        dsn_config_get_value_bool("replication",
            "write_empty_enabled",
            write_empty_enabled,
            "whether to enable empty write when no write requests are processed for more than group_check_period, default is true"
            );
    
    read_meta_servers();

    sanity_check();
}
Exemplo n.º 3
0
 screen_logger::screen_logger(const char* log_dir, logging_provider* inner)
     : logging_provider(log_dir, inner)
 {
     _short_header = dsn_config_get_value_bool("tools.screen_logger", "short_header",
         true, "whether to use short header (excluding file/function etc.)");
 }
Exemplo n.º 4
0
        simple_logger::simple_logger(const char* log_dir, logging_provider* inner)
            : logging_provider(log_dir, inner)
        {
            _log_dir = std::string(log_dir);
            //we assume all valid entries are positive
            _start_index = 0;
            _index = 1;
            _lines = 0;
            _log = nullptr;
            _short_header = dsn_config_get_value_bool("tools.simple_logger", "short_header", 
                true, "whether to use short header (excluding file/function etc.)");
            _fast_flush = dsn_config_get_value_bool("tools.simple_logger", "fast_flush",
                false, "whether to flush immediately");
            _stderr_start_level = enum_from_string(
                        dsn_config_get_value_string("tools.simple_logger", "stderr_start_level",
                            enum_to_string(LOG_LEVEL_WARNING),
                            "copy log messages at or above this level to stderr in addition to logfiles"),
                        LOG_LEVEL_INVALID
                        );
            dassert(_stderr_start_level != LOG_LEVEL_INVALID,
                    "invalid [tools.simple_logger] stderr_start_level specified");

            _max_number_of_log_files_on_disk = dsn_config_get_value_uint64(
                "tools.simple_logger",
                "max_number_of_log_files_on_disk",
                20,
                "max number of log files reserved on disk, older logs are auto deleted"
                );

            // check existing log files
            std::vector<std::string> sub_list;
            if (!dsn::utils::filesystem::get_subfiles(_log_dir, sub_list, false))
            {
                dassert(false, "Fail to get subfiles in %s.", _log_dir.c_str());
            }             
            for (auto& fpath : sub_list)
            {
                auto&& name = dsn::utils::filesystem::get_file_name(fpath);
                if (name.length() <= 8 ||
                    name.substr(0, 4) != "log.")
                    continue;

                int index;
                if (1 != sscanf(name.c_str(), "log.%d.txt", &index) || index <= 0)
                    continue;

                if (index > _index)
                    _index = index;

                if (_start_index == 0 || index < _start_index)
                    _start_index = index;
            }
            sub_list.clear();

            if (_start_index == 0)
            {
                _start_index = _index;
            }
            else
                ++_index;

            create_log_file();
        }
Exemplo n.º 5
0
void tracer::install(service_spec &spec)
{
    auto trace = dsn_config_get_value_bool(
        "task..default", "is_trace", false, "whether to trace tasks by default");

    for (int i = 0; i <= dsn::task_code::max(); i++) {
        if (i == TASK_CODE_INVALID)
            continue;

        std::string section_name =
            std::string("task.") + std::string(dsn::task_code(i).to_string());
        task_spec *spec = task_spec::get(i);
        dassert(spec != nullptr, "task_spec cannot be null");

        if (!dsn_config_get_value_bool(
                section_name.c_str(), "is_trace", trace, "whether to trace this kind of task"))
            continue;

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_create",
                                      true,
                                      "whether to trace when a task is created"))
            spec->on_task_create.put_back(tracer_on_task_create, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_enqueue",
                                      true,
                                      "whether to trace when a timer or async task is enqueued"))
            spec->on_task_enqueue.put_back(tracer_on_task_enqueue, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_begin",
                                      true,
                                      "whether to trace when a task begins"))
            spec->on_task_begin.put_back(tracer_on_task_begin, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_end",
                                      true,
                                      "whether to trace when a task ends"))
            spec->on_task_end.put_back(tracer_on_task_end, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_cancelled",
                                      true,
                                      "whether to trace when a task is cancelled"))
            spec->on_task_cancelled.put_back(tracer_on_task_cancelled, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_wait_pre",
                                      true,
                                      "whether to trace when a task is to be wait"))
            spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_wait_post",
                                      true,
                                      "whether to trace when a task is wait post"))
            spec->on_task_wait_post.put_back(tracer_on_task_wait_post, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_task_cancel_post",
                                      true,
                                      "whether to trace when a task is cancel post"))
            spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_aio_call",
                                      true,
                                      "whether to trace when an aio task is called"))
            spec->on_aio_call.put_back(tracer_on_aio_call, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_aio_enqueue",
                                      true,
                                      "whether to trace when an aio task is enqueued"))
            spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_rpc_call",
                                      true,
                                      "whether to trace when a rpc is made"))
            spec->on_rpc_call.put_back(tracer_on_rpc_call, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_rpc_request_enqueue",
                                      true,
                                      "whether to trace when a rpc request task is enqueued"))
            spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_rpc_reply",
                                      true,
                                      "whether to trace when reply a rpc request"))
            spec->on_rpc_reply.put_back(tracer_on_rpc_reply, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_rpc_response_enqueue",
                                      true,
                                      "whetehr to trace when a rpc response task is enqueued"))
            spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, "tracer");

        if (dsn_config_get_value_bool(section_name.c_str(),
                                      "tracer::on_rpc_create_response",
                                      true,
                                      "whetehr to trace when a rpc response task is created"))
            spec->on_rpc_create_response.put_back(tracer_on_rpc_create_response, "tracer");
    }

    command_manager::instance().register_command(
        {"tracer.find"},
        "tracer.find - find related logs",
        "tracer.find forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., "
        "a023003920302390) log_file_name(log.xx.txt)",
        tracer_log_flow);
}