Beispiel #1
0
 static void corrupt_data(message_ex* request, const std::string& corrupt_type)
 {
     if (corrupt_type == "header")
         replace_value(request->buffers, dsn_random32(0, sizeof(message_header)-1));
     else if (corrupt_type == "body")
         replace_value(request->buffers, dsn_random32(0, request->body_size()-1) + sizeof(message_header));
     else if (corrupt_type == "random")
         replace_value(request->buffers, dsn_random32(0, request->body_size() + sizeof(message_header) - 1));
     else
     {
         derror("try to inject an unknown data corrupt type: %s", corrupt_type.c_str());
     }
 }
Beispiel #2
0
void load_balancer::run_lb(partition_configuration& pc)
{
    if (_state->freezed())
        return;

    configuration_update_request proposal;
    proposal.config = pc;

    if (pc.primary.is_invalid())
    {
        if (pc.secondaries.size() > 0)
        {
            proposal.node = pc.secondaries[dsn_random32(0, static_cast<int>(pc.secondaries.size()) - 1)];
            proposal.type = CT_UPGRADE_TO_PRIMARY;
        }

        else if (pc.last_drops.size() == 0)
        {
            proposal.node = find_minimal_load_machine(true);
            proposal.type = CT_ASSIGN_PRIMARY;
        }

        // DDD
        else
        {
            proposal.node = *pc.last_drops.rbegin();
            proposal.type = CT_ASSIGN_PRIMARY;

            derror("%s.%d.%d enters DDD state, we are waiting for its last primary node %s to come back ...",
                pc.app_type.c_str(),
                pc.gpid.app_id,
                pc.gpid.pidx,
                proposal.node.to_string()
                );
        }

        if (proposal.node.is_invalid() == false)
        {
            send_proposal(proposal.node, proposal);
        }
    }

    else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count)
    {
        proposal.type = CT_ADD_SECONDARY;
        proposal.node = find_minimal_load_machine(false);
        if (proposal.node.is_invalid() == false && 
            proposal.node != pc.primary &&
            std::find(pc.secondaries.begin(), pc.secondaries.end(), proposal.node) == pc.secondaries.end())
        {
            send_proposal(pc.primary, proposal);
        }
    }
    else
    {
        // it is healthy, nothing to do
    }
}
Beispiel #3
0
 static void fault_on_rpc_response_enqueue(rpc_response_task* resp)
 {
     fj_opt& opt = s_fj_opts[resp->spec().code];
     if (resp->delay_milliseconds() == 0 && task_ext_for_fj::get(resp) == 0)
     {
         resp->set_delay(dsn_random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max));
         task_ext_for_fj::get(resp) = 1; // ensure only fd once
     }
 }
Beispiel #4
0
 static void fault_on_rpc_request_enqueue(rpc_request_task* callee)
 {
     fj_opt& opt = s_fj_opts[callee->spec().code];
     if (callee->delay_milliseconds() == 0 && task_ext_for_fj::get(callee) == 0)
     {
         callee->set_delay(dsn_random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max));
         task_ext_for_fj::get(callee) = 1; // ensure only fd once
     }
 }
Beispiel #5
0
void timer_task::enqueue()
{
    // enable timer randomization to avoid lots of timers execution simultaneously
    if (delay_milliseconds() == 0 && spec().randomize_timer_delay_if_zero)
    {
        set_delay(dsn_random32(0, _interval_milliseconds));
    }
    
    return task::enqueue();
}
Beispiel #6
0
timer_task::timer_task(dsn_task_code_t code, dsn_task_handler_t cb, void* param, uint32_t interval_milliseconds, int hash, service_node* node)
    : task(code, hash, node), 
    _interval_milliseconds(interval_milliseconds),
    _cb(cb),
    _param(param)
{
    dassert (TASK_TYPE_COMPUTE == spec().type, "this must be a computation type task, please use DEFINE_TASK_CODE to define the task code");

    // enable timer randomization to avoid lots of timers execution simultaneously
    set_delay(dsn_random32(0, interval_milliseconds));
}
Beispiel #7
0
void sim_task_queue::enqueue(task* t)
{
    dassert(0 == t->delay_milliseconds(), "delay time must be zero");
    if (_tasks.size() > 0)
    {
        do {
            int random_pos = dsn_random32(0, 1000000);
            auto pr = _tasks.insert(std::map<uint32_t, task*>::value_type(random_pos, t));
            if (pr.second) break;
        } while (true);
    }
    else
    {
        int random_pos = dsn_random32(0, 1000000);
        _tasks.insert(std::map<uint32_t, task*>::value_type(random_pos, t));
    }

    // for scheduling
    if (task::get_current_worker())
    {
        scheduler::instance().wait_schedule(true, true);
    }
}
Beispiel #8
0
void server_state::switch_meta_primary()
{
    zauto_write_lock l(_meta_lock);
    if (1 == _meta_servers.size())
        return;

    while (true)
    {
        int r = dsn_random32(0, (uint32_t)_meta_servers.size() - 1);
        if (r != _leader_index)
        {
            _leader_index = r;
            return;
        }
    }
}
Beispiel #9
0
::dsn::rpc_address load_balancer::find_minimal_load_machine(bool primaryOnly)
{
    std::vector<std::pair< ::dsn::rpc_address, int>> stats;

    for (auto it = _state->_nodes.begin(); it != _state->_nodes.end(); it++)
    {
        if (it->second.is_alive)
        {
            stats.push_back(std::make_pair(it->first, static_cast<int>(primaryOnly ? it->second.primaries.size()
                : it->second.partitions.size())));
        }
    }

    
    std::sort(stats.begin(), stats.end(), [](const std::pair< ::dsn::rpc_address, int>& l, const std::pair< ::dsn::rpc_address, int>& r)
    {
        return l.second < r.second || (l.second == r.second && l.first < r.first);
    });

    if (stats.empty())
    {
        return ::dsn::rpc_address();
    }

    if (s_lb_for_test)
    {
        // alway use the first (minimal) one
        return stats[0].first;
    }

    int candidate_count = 1;
    int val = stats[0].second;

    for (size_t i = 1; i < stats.size(); i++)
    {
        if (stats[i].second > val)
            break;
        candidate_count++;
    }

    return stats[dsn_random32(0, candidate_count - 1)].first;
}
        /*search in cache*/
        rpc_address partition_resolver_simple::get_address(const partition_configuration& config) const
        {
            if (_app_is_stateful)
            {
                return config.primary;
            }
            else
            {
                if (config.last_drops.size() == 0)
                {
                    return rpc_address();
                }
                else
                {
                    return config.last_drops[dsn_random32(0, config.last_drops.size() - 1)];
                }
            }

            //if (is_write || semantic == read_semantic::ReadLastUpdate)
            //    return config.primary;

            //// readsnapshot or readoutdated, using random
            //else
            //{
            //    bool has_primary = false;
            //    int N = static_cast<int>(config.secondaries.size());
            //    if (!config.primary.is_invalid())
            //    {
            //        N++;
            //        has_primary = true;
            //    }

            //    if (0 == N) return config.primary;

            //    int r = random32(0, 1000) % N;
            //    if (has_primary && r == N - 1)
            //        return config.primary;
            //    else
            //        return config.secondaries[r];
            //}
        }
Beispiel #11
0
void server_state::remove_meta_node(const ::dsn::rpc_address& node)
{
    zauto_write_lock l(_meta_lock);
    
    int i = -1;
    for (auto it = _meta_servers.begin(); it != _meta_servers.end(); it++)
    {
        i++;
        if (*it == node)
        {
            _meta_servers.erase(it);
            if (_meta_servers.size() == 0)
                _leader_index = -1;

            else if (i == _leader_index)
            {
                _leader_index = dsn_random32(0, (uint32_t)_meta_servers.size() - 1);
            }
            return;
        }
    }

    dassert (false, "cannot find node '%s:%hu' in server state", node.name(), node.port());
}
Beispiel #12
0
void simple_load_balancer::run_lb(app_info& info, partition_configuration& pc, bool is_stateful)
{
    if (_state->freezed() && is_stateful)
        return;

    configuration_update_request proposal;
    proposal.config = pc;
    proposal.info = info;

    if (is_stateful)
    {
        if (pc.primary.is_invalid())
        {
            if (pc.secondaries.size() > 0)
            {
                if (s_lb_for_test)
                {
                    std::vector< ::dsn::rpc_address> tmp(pc.secondaries);
                    std::sort(tmp.begin(), tmp.end());
                    proposal.node = tmp[0];
                }
                else
                {
                    proposal.node = pc.secondaries[dsn_random32(0, static_cast<int>(pc.secondaries.size()) - 1)];
                }
                proposal.type = config_type::CT_UPGRADE_TO_PRIMARY;
            }

            else if (pc.last_drops.size() == 0)
            {
                proposal.node = find_minimal_load_machine(true);
                proposal.type = config_type::CT_ASSIGN_PRIMARY;
            }

            // DDD
            else
            {
                proposal.node = *pc.last_drops.rbegin();
                proposal.type = config_type::CT_ASSIGN_PRIMARY;

                derror("%s.%d.%d enters DDD state, we are waiting for its last primary node %s to come back ...",
                    info.app_type.c_str(),
                    pc.pid.get_app_id(),
                    pc.pid.get_partition_index(),
                    proposal.node.to_string()
                    );
            }

            if (proposal.node.is_invalid() == false)
            {
                send_proposal(proposal.node, proposal);
            }
        }

        else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count)
        {
            proposal.type = config_type::CT_ADD_SECONDARY;
            proposal.node = find_minimal_load_machine(false);
            if (proposal.node.is_invalid() == false &&
                proposal.node != pc.primary &&
                std::find(pc.secondaries.begin(), pc.secondaries.end(), proposal.node) == pc.secondaries.end())
            {
                send_proposal(pc.primary, proposal);
            }
        }
        else
        {
            // it is healthy, nothing to do
        }
    }
    
    // stateless
    else
    {
        partition_configuration_stateless pcs(pc);

        if (static_cast<int>(pcs.worker_replicas().size()) < pc.max_replica_count)
        {
            proposal.type = config_type::CT_ADD_SECONDARY;
            proposal.node = find_minimal_load_machine(false);
            if (proposal.node.is_invalid() == false)
            {
                bool send = true;

                for (auto& s : pc.secondaries)
                {
                    // not on the same machine
                    if (s == proposal.node)
                    {
                        send = false;
                        break;
                    }
                }

                if (send)
                {
                    send_proposal(proposal.node, proposal);
                }
            }
        }
        else
        {
            // it is healthy, nothing to do
        }
    }
}
Beispiel #13
0
        // run in replica thread
        void replica::init_checkpoint()
        {
            // only applicable to primary and secondary replicas
            if (status() != PS_PRIMARY && status() != PS_SECONDARY)
                return;

            // no need to checkpoint
            if (_app->is_delta_state_learning_supported())
                return;

            auto err = _app->checkpoint_async();
            if (err != ERR_NOT_IMPLEMENTED)
            {
                if (err == ERR_OK)
                {
                    ddebug("%s: checkpoint_async succeed, app_last_committed_decree=%" PRId64 ", app_last_durable_decree=%" PRId64,
                           name(), _app->last_committed_decree(), _app->last_durable_decree());
                }
                if (err != ERR_OK && err != ERR_WRONG_TIMING && err != ERR_NO_NEED_OPERATE && err != ERR_TRY_AGAIN)
                {
                    derror("%s: checkpoint_async failed, err = %s", name(), err.to_string());
                }
                return;
            }

            // private log must be enabled to make sure commits
            // are not lost during checkpinting
            dassert(nullptr != _private_log, "log_enable_private_prepare must be true for checkpointing");

            if (last_committed_decree() - last_durable_decree() < _options->checkpoint_min_decree_gap)
                return;

            // primary cannot checkpoint (TODO: test if async checkpoint is supported)
            // therefore we have to copy checkpoints from secondaries
            if (PS_PRIMARY == status())
            {
                // only one running instance
                if (nullptr == _primary_states.checkpoint_task)
                {
                    if (_primary_states.membership.secondaries.size() == 0)
                        return;

                    std::shared_ptr<replica_configuration> rc(new replica_configuration);
                    _primary_states.get_replica_config(PS_SECONDARY, *rc);

                    rpc_address sd = _primary_states.membership.secondaries
                        [dsn_random32(0, (int)_primary_states.membership.secondaries.size() - 1)];

                    _primary_states.checkpoint_task = rpc::call_typed(
                        sd,
                        RPC_REPLICA_COPY_LAST_CHECKPOINT,
                        rc,
                        this,
                        &replica::on_copy_checkpoint_ack,
                        gpid_to_hash(get_gpid())
                        );
                }
            }

            // secondary can start checkpint in the long running thread pool
            else
            {
                dassert(PS_SECONDARY == status(), "");

                // only one running instance
                if (!_secondary_states.checkpoint_is_running)
                {
                    _secondary_states.checkpoint_is_running = true;
                    tasking::enqueue(
                        &_secondary_states.checkpoint_task,
                        LPC_CHECKPOINT_REPLICA,
                        this,
                        &replica::background_checkpoint,
                        gpid_to_hash(get_gpid())
                        );
                }                
            }
        }
Beispiel #14
0
 uint32_t sim_network_provider::net_delay_milliseconds() const
 {
     return static_cast<uint32_t>(dsn_random32(_min_message_delay_microseconds, _max_message_delay_microseconds)) / 1000;
 }    
Beispiel #15
0
void scheduler::schedule()
{
    _is_scheduling = true;

    check(); // check before schedule

    while (true)
    {
        // run ready workers whenever possible
        std::vector<int> ready_workers;
        for (auto& s : _threads)
        {
            if ((s->in_continuation && s->is_continuation_ready)
                || (!s->in_continuation && s->worker->queue()->approx_count() > 0)
                )
            {
                ready_workers.push_back(s->index);
            }
        }

        if (ready_workers.size() > 0)
        {
            int i = dsn_random32(0, (uint32_t)ready_workers.size() - 1);
            _threads[ready_workers[i]]->runnable.release();
            
            _is_scheduling = false;
            return;
        }

        // otherwise, run the timed tasks
        uint64_t ts = 0;
        auto events = _wheel.pop_next_events(ts);
        if (events)
        {
            {
                utils::auto_lock< ::dsn::utils::ex_lock> l(_lock);
                _time_ns = ts;
            }

            // randomize the events, and see
            std::random_shuffle(events->begin(), events->end(), [](int n) { return dsn_random32(0, n - 1); });

            for (auto e : *events)
            {
                if (e.app_task != nullptr)
                {
                    task* t = e.app_task;

                    {
                        node_scoper ns(t->node());
                        t->enqueue();
                    }

                    t->release_ref(); // added by previous t->enqueue from app
                }
                else
                {
                    dassert(e.system_task != nullptr, "app and system tasks cannot be both empty");
                    e.system_task();
                }
            }

            delete events;
            continue;
        }

        // wait a moment
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
    }

    _is_scheduling = false;
}