Esempio n. 1
0
void server_state::init_app()
{
    zauto_write_lock l(_lock);
    if (_apps.size() > 0)
        return;

    app_state app;
    app.app_id = 1;
    app.app_name = dsn_config_get_value_string("replication.app",
        "app_name", "", "replication app name");
    dassert(app.app_name.length() > 0, "'[replication.app] app_name' not specified");
    app.app_type = dsn_config_get_value_string("replication.app",
        "app_type", "", "replication app type-name");
    dassert(app.app_type.length() > 0, "'[replication.app] app_type' not specified");
    app.partition_count = (int)dsn_config_get_value_uint64("replication.app", 
        "partition_count", 1, "how many partitions the app should have");

    int32_t max_replica_count = (int)dsn_config_get_value_uint64("replication.app",
        "max_replica_count", 3, "maximum replica count for each partition");
    for (int i = 0; i < app.partition_count; i++)
    {
        partition_configuration ps;
        ps.app_type = app.app_type;
        ps.ballot = 0;
        ps.gpid.app_id = app.app_id;
        ps.gpid.pidx = i;
        ps.last_committed_decree = 0;
        ps.max_replica_count = max_replica_count;
        ps.primary.set_invalid();
        
        app.partitions.push_back(ps);
    }
    
    _apps.push_back(app);
}
Esempio n. 2
0
    sim_network_provider::sim_network_provider(rpc_engine* rpc, network* inner_provider)
        : connection_oriented_network(rpc, inner_provider)
    {
        _address.assign_ipv4("localhost", 1);

        _min_message_delay_microseconds = 1;
        _max_message_delay_microseconds = 100000;

        _min_message_delay_microseconds = (uint32_t)dsn_config_get_value_uint64("tools.simulator",
            "min_message_delay_microseconds", _min_message_delay_microseconds,
            "min message delay (us)");
        _max_message_delay_microseconds = (uint32_t)dsn_config_get_value_uint64("tools.simulator",
            "max_message_delay_microseconds", _max_message_delay_microseconds,
            "max message delay (us)");
    }
Esempio n. 3
0
 network::network(rpc_engine* srv, network* inner_provider)
     : _engine(srv), _parser_type(NET_HDR_DSN)
 {   
     _message_buffer_block_size = 1024 * 64;
     _max_buffer_block_count_per_send = 64; // TODO: windows, how about the other platforms?
     _send_queue_threshold = (int)dsn_config_get_value_uint64(
         "network", "send_queue_threshold",
         4 * 1024, "send queue size above which throttling is applied"
         );
 }
Esempio n. 4
0
sim_env_provider::sim_env_provider(env_provider* inner_provider)
    : env_provider(inner_provider)
{
    task_worker::on_start.put_front(on_worker_start, "sim_env_provider::on_worker_start");

    _seed = (int)dsn_config_get_value_uint64("tools.simulator", "random_seed", 0, "random seed for the simulator, 0 for random random seed");
    if (_seed == 0)
    {
        _seed = std::random_device{}();
    }

    derror("simulation.random seed for this round is %d", _seed);
}
Esempio n. 5
0
    network::network(rpc_engine* srv, network* inner_provider)
        : _engine(srv), _client_hdr_format(NET_HDR_DSN), _unknown_msg_header_format(NET_HDR_INVALID)
    {   
        _message_buffer_block_size = 1024 * 64;
        _max_buffer_block_count_per_send = 64; // TODO: windows, how about the other platforms?
        _send_queue_threshold = (int)dsn_config_get_value_uint64(
            "network", "send_queue_threshold",
            4 * 1024, "send queue size above which throttling is applied"
            );

        _unknown_msg_header_format = network_header_format::from_string(
            dsn_config_get_value_string(
                "network", 
                "unknown_message_header_format", 
                NET_HDR_INVALID.to_string(),
                "format for unknown message headers, default is NET_HDR_INVALID"
                ), NET_HDR_INVALID);
    }
Esempio n. 6
0
void replication_options::initialize()
{
    prepare_timeout_ms_for_secondaries =
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_secondaries", 
        prepare_timeout_ms_for_secondaries,
        "timeout (ms) for prepare message to secondaries in two phase commit"
        );
    prepare_timeout_ms_for_potential_secondaries = 
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_potential_secondaries",
        prepare_timeout_ms_for_potential_secondaries,
        "timeout (ms) for prepare message to potential secondaries in two phase commit"
        );

    batch_write_disabled =
        dsn_config_get_value_bool("replication",
        "batch_write_disabled",
        batch_write_disabled,
        "whether to disable auto-batch of replicated write requests"
        );
    staleness_for_commit =
        (int)dsn_config_get_value_uint64("replication", 
        "staleness_for_commit", 
        staleness_for_commit,
        "how many concurrent two phase commit rounds are allowed"
        );
    max_mutation_count_in_prepare_list =
        (int)dsn_config_get_value_uint64("replication", 
        "max_mutation_count_in_prepare_list", 
        max_mutation_count_in_prepare_list,
        "maximum number of mutations in prepare list"
        );
    mutation_2pc_min_replica_count =
        (int)dsn_config_get_value_uint64("replication", 
        "mutation_2pc_min_replica_count",
        mutation_2pc_min_replica_count,
        "minimum number of alive replicas under which write is allowed"
        );

    group_check_disabled =
        dsn_config_get_value_bool("replication",
        "group_check_disabled",
        group_check_disabled,
        "whether group check is disabled"
        );
    group_check_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "group_check_interval_ms", 
        group_check_interval_ms,
        "every what period (ms) we check the replica healthness"
        );

    checkpoint_disabled =
        dsn_config_get_value_bool("replication",
        "checkpoint_disabled",
        checkpoint_disabled,
        "whether checkpoint is disabled"
        );
    checkpoint_interval_seconds =
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_interval_seconds",
        checkpoint_interval_seconds,
        "every what period (seconds) we do checkpoints for replicated apps"
        ); 
    checkpoint_min_decree_gap = 
        (int64_t)dsn_config_get_value_uint64("replication",
        "checkpoint_min_decree_gap",
        checkpoint_min_decree_gap,
        "minimum decree gap that triggers checkpoint"
        );
    checkpoint_max_interval_hours = 
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_max_interval_hours",
        checkpoint_max_interval_hours,
        "maximum time interval (hours) where a new checkpoint must be created"
        );

    gc_disabled =
        dsn_config_get_value_bool("replication",
        "gc_disabled",
        gc_disabled,
        "whether to disable garbage collection"
        );
    gc_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_interval_ms", 
        gc_interval_ms,
        "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc."
        );
    gc_memory_replica_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_memory_replica_interval_ms", 
        gc_memory_replica_interval_ms,
        "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover"
        );
    gc_disk_error_replica_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_disk_error_replica_interval_seconds", 
        gc_disk_error_replica_interval_seconds,
        "error replica are deleted after they have been closed and lasted on disk this long (seconds)"
        );

    fd_disabled =
        dsn_config_get_value_bool("replication",
        "fd_disabled",
        fd_disabled,
        "whether to disable failure detection"
        );
    fd_check_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_check_interval_seconds", 
        fd_check_interval_seconds,
        "every this period(seconds) the FD will check healthness of remote peers"
        );
    fd_beacon_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_beacon_interval_seconds", 
        fd_beacon_interval_seconds,
        "every this period(seconds) the FD sends beacon message to remote peers"
        );
    fd_lease_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_lease_seconds", 
        fd_lease_seconds,
        "lease (seconds) get from remote FD master"
        );
    fd_grace_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_grace_seconds", 
        fd_grace_seconds,
        "grace (seconds) assigned to remote FD slaves (grace > lease)"
        );

    log_private_disabled =
        dsn_config_get_value_bool("replication",
        "log_private_disabled",
        log_private_disabled,
        "whether to disable logging committed mutations for each app, which is used for easier learning"
        );
    log_private_file_size_mb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_file_size_mb",
        log_private_file_size_mb,
        "private log maximum segment file size (MB)"
        );
    log_private_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_batch_buffer_kb",
        log_private_batch_buffer_kb,
        "private log buffer size (KB) for batching incoming logs"
        );
    log_private_force_flush =
        dsn_config_get_value_bool("replication",
        "log_private_force_flush",
        log_private_force_flush,
        "when write private log, whether to flush file after write done"
        );

    log_shared_file_size_mb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_shared_file_size_mb",
        log_shared_file_size_mb,
        "shared log maximum segment file size (MB)"
        );
    log_shared_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_batch_buffer_KB_shared", 
        log_shared_batch_buffer_kb,
        "shared log buffer size (KB) for batching incoming logs"
        );
    log_shared_force_flush =
        dsn_config_get_value_bool("replication",
        "log_shared_force_flush",
        log_shared_force_flush,
        "when write shared log, whether to flush file after write done"
        );

    config_sync_disabled =
        dsn_config_get_value_bool("replication", 
        "config_sync_disabled",
        config_sync_disabled,
        "whether to disable replica configuration periodical sync with the meta server"
        );
    config_sync_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "config_sync_interval_ms", 
        config_sync_interval_ms,
        "every this period(ms) the replica syncs replica configuration with the meta server"
        );

    lb_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "lb_interval_ms",
        lb_interval_ms,
        "every this period(ms) the meta server will do load balance"
        );

    read_meta_servers();

    sanity_check();
}
Esempio n. 7
0
void replication_options::initialize()
{
    dsn_app_info app_info;
    bool r = dsn_get_current_app_info(&app_info);
    dassert(r, "get current app info failed");
    app_name = app_info.name;
    app_dir = app_info.data_dir;

    // slog_dir:
    // - if config[slog_dir] is empty: "app_dir/slog"
    // - else: "config[slog_dir]/app_name/slog"
    slog_dir = dsn_config_get_value_string("replication", "slog_dir", "", "shared log directory");
    if (slog_dir.empty())
    {
        slog_dir = app_dir;
    }
    else
    {
        slog_dir = utils::filesystem::path_combine(slog_dir, app_name);
    }
    slog_dir = utils::filesystem::path_combine(slog_dir, "slog");

    // data_dirs
    // - if config[data_dirs] is empty: "app_dir/reps"
    // - else: "config[data_dirs]/app_name/reps"
    std::string dirs_str = dsn_config_get_value_string("replication", "data_dirs", "", "replica directory list");
    std::vector<std::string> dirs;
    ::dsn::utils::split_args(dirs_str.c_str(), dirs, ',');
    if (dirs.empty())
    {
        dirs.push_back(app_dir);
    }
    else
    {
        for (auto& dir : dirs)
        {
            dir = utils::filesystem::path_combine(dir, app_name);
        }
    }
    for (auto& dir : dirs)
    {
        data_dirs.push_back(utils::filesystem::path_combine(dir, "reps"));
    }

    prepare_timeout_ms_for_secondaries =
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_secondaries", 
        prepare_timeout_ms_for_secondaries,
        "timeout (ms) for prepare message to secondaries in two phase commit"
        );
    prepare_timeout_ms_for_potential_secondaries = 
        (int)dsn_config_get_value_uint64("replication", 
        "prepare_timeout_ms_for_potential_secondaries",
        prepare_timeout_ms_for_potential_secondaries,
        "timeout (ms) for prepare message to potential secondaries in two phase commit"
        );

    batch_write_disabled =
        dsn_config_get_value_bool("replication",
        "batch_write_disabled",
        batch_write_disabled,
        "whether to disable auto-batch of replicated write requests"
        );
    staleness_for_commit =
        (int)dsn_config_get_value_uint64("replication", 
        "staleness_for_commit", 
        staleness_for_commit,
        "how many concurrent two phase commit rounds are allowed"
        );
    max_mutation_count_in_prepare_list =
        (int)dsn_config_get_value_uint64("replication", 
        "max_mutation_count_in_prepare_list", 
        max_mutation_count_in_prepare_list,
        "maximum number of mutations in prepare list"
        );
    mutation_2pc_min_replica_count =
        (int)dsn_config_get_value_uint64("replication", 
        "mutation_2pc_min_replica_count",
        mutation_2pc_min_replica_count,
        "minimum number of alive replicas under which write is allowed"
        );

    group_check_disabled =
        dsn_config_get_value_bool("replication",
        "group_check_disabled",
        group_check_disabled,
        "whether group check is disabled"
        );
    group_check_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "group_check_interval_ms", 
        group_check_interval_ms,
        "every what period (ms) we check the replica healthness"
        );

    checkpoint_disabled =
        dsn_config_get_value_bool("replication",
        "checkpoint_disabled",
        checkpoint_disabled,
        "whether checkpoint is disabled"
        );
    checkpoint_interval_seconds =
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_interval_seconds",
        checkpoint_interval_seconds,
        "every what period (seconds) we do checkpoints for replicated apps"
        ); 
    checkpoint_min_decree_gap = 
        (int64_t)dsn_config_get_value_uint64("replication",
        "checkpoint_min_decree_gap",
        checkpoint_min_decree_gap,
        "minimum decree gap that triggers checkpoint"
        );
    checkpoint_max_interval_hours = 
        (int)dsn_config_get_value_uint64("replication",
        "checkpoint_max_interval_hours",
        checkpoint_max_interval_hours,
        "maximum time interval (hours) where a new checkpoint must be created"
        );

    gc_disabled =
        dsn_config_get_value_bool("replication",
        "gc_disabled",
        gc_disabled,
        "whether to disable garbage collection"
        );
    gc_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_interval_ms", 
        gc_interval_ms,
        "every what period (ms) we do garbage collection for dead replicas, on-disk state, log, etc."
        );
    gc_memory_replica_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_memory_replica_interval_ms", 
        gc_memory_replica_interval_ms,
        "after closing a healthy replica (due to LB), the replica will remain in memory for this long (ms) for quick recover"
        );
    gc_disk_error_replica_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "gc_disk_error_replica_interval_seconds", 
        gc_disk_error_replica_interval_seconds,
        "error replica are deleted after they have been closed and lasted on disk this long (seconds)"
        );

    fd_disabled =
        dsn_config_get_value_bool("replication",
        "fd_disabled",
        fd_disabled,
        "whether to disable failure detection"
        );
    fd_check_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_check_interval_seconds", 
        fd_check_interval_seconds,
        "every this period(seconds) the FD will check healthness of remote peers"
        );
    fd_beacon_interval_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_beacon_interval_seconds", 
        fd_beacon_interval_seconds,
        "every this period(seconds) the FD sends beacon message to remote peers"
        );
    fd_lease_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_lease_seconds", 
        fd_lease_seconds,
        "lease (seconds) get from remote FD master"
        );
    fd_grace_seconds =
        (int)dsn_config_get_value_uint64("replication", 
        "fd_grace_seconds", 
        fd_grace_seconds,
        "grace (seconds) assigned to remote FD slaves (grace > lease)"
        );

    log_private_disabled =
        dsn_config_get_value_bool("replication",
        "log_private_disabled",
        log_private_disabled,
        "whether to disable logging committed mutations for each app, which is used for easier learning"
        );
    log_private_file_size_mb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_file_size_mb",
        log_private_file_size_mb,
        "private log maximum segment file size (MB)"
        );
    log_private_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication",
        "log_private_batch_buffer_kb",
        log_private_batch_buffer_kb,
        "private log buffer size (KB) for batching incoming logs"
        );
    log_private_force_flush =
        dsn_config_get_value_bool("replication",
        "log_private_force_flush",
        log_private_force_flush,
        "when write private log, whether to flush file after write done"
        );

    log_shared_file_size_mb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_shared_file_size_mb",
        log_shared_file_size_mb,
        "shared log maximum segment file size (MB)"
        );
    log_shared_batch_buffer_kb =
        (int)dsn_config_get_value_uint64("replication", 
        "log_batch_buffer_KB_shared", 
        log_shared_batch_buffer_kb,
        "shared log buffer size (KB) for batching incoming logs"
        );
    log_shared_force_flush =
        dsn_config_get_value_bool("replication",
        "log_shared_force_flush",
        log_shared_force_flush,
        "when write shared log, whether to flush file after write done"
        );

    config_sync_disabled =
        dsn_config_get_value_bool("replication", 
        "config_sync_disabled",
        config_sync_disabled,
        "whether to disable replica configuration periodical sync with the meta server"
        );
    config_sync_interval_ms =
        (int)dsn_config_get_value_uint64("replication", 
        "config_sync_interval_ms", 
        config_sync_interval_ms,
        "every this period(ms) the replica syncs replica configuration with the meta server"
        );

    lb_interval_ms =
        (int)dsn_config_get_value_uint64("replication",
        "lb_interval_ms",
        lb_interval_ms,
        "every this period(ms) the meta server will do load balance"
        );

    write_empty_enabled =
        dsn_config_get_value_bool("replication",
            "write_empty_enabled",
            write_empty_enabled,
            "whether to enable empty write when no write requests are processed for more than group_check_period, default is true"
            );
    
    read_meta_servers();

    sanity_check();
}
Esempio n. 8
0
        simple_logger::simple_logger(const char* log_dir, logging_provider* inner)
            : logging_provider(log_dir, inner)
        {
            _log_dir = std::string(log_dir);
            //we assume all valid entries are positive
            _start_index = 0;
            _index = 1;
            _lines = 0;
            _log = nullptr;
            _short_header = dsn_config_get_value_bool("tools.simple_logger", "short_header", 
                true, "whether to use short header (excluding file/function etc.)");
            _fast_flush = dsn_config_get_value_bool("tools.simple_logger", "fast_flush",
                false, "whether to flush immediately");
            _stderr_start_level = enum_from_string(
                        dsn_config_get_value_string("tools.simple_logger", "stderr_start_level",
                            enum_to_string(LOG_LEVEL_WARNING),
                            "copy log messages at or above this level to stderr in addition to logfiles"),
                        LOG_LEVEL_INVALID
                        );
            dassert(_stderr_start_level != LOG_LEVEL_INVALID,
                    "invalid [tools.simple_logger] stderr_start_level specified");

            _max_number_of_log_files_on_disk = dsn_config_get_value_uint64(
                "tools.simple_logger",
                "max_number_of_log_files_on_disk",
                20,
                "max number of log files reserved on disk, older logs are auto deleted"
                );

            // check existing log files
            std::vector<std::string> sub_list;
            if (!dsn::utils::filesystem::get_subfiles(_log_dir, sub_list, false))
            {
                dassert(false, "Fail to get subfiles in %s.", _log_dir.c_str());
            }             
            for (auto& fpath : sub_list)
            {
                auto&& name = dsn::utils::filesystem::get_file_name(fpath);
                if (name.length() <= 8 ||
                    name.substr(0, 4) != "log.")
                    continue;

                int index;
                if (1 != sscanf(name.c_str(), "log.%d.txt", &index) || index <= 0)
                    continue;

                if (index > _index)
                    _index = index;

                if (_start_index == 0 || index < _start_index)
                    _start_index = index;
            }
            sub_list.clear();

            if (_start_index == 0)
            {
                _start_index = _index;
            }
            else
                ++_index;

            create_log_file();
        }