static void corrupt_data(message_ex* request, const std::string& corrupt_type) { if (corrupt_type == "header") replace_value(request->buffers, dsn_random32(0, sizeof(message_header)-1)); else if (corrupt_type == "body") replace_value(request->buffers, dsn_random32(0, request->body_size()-1) + sizeof(message_header)); else if (corrupt_type == "random") replace_value(request->buffers, dsn_random32(0, request->body_size() + sizeof(message_header) - 1)); else { derror("try to inject an unknown data corrupt type: %s", corrupt_type.c_str()); } }
void load_balancer::run_lb(partition_configuration& pc) { if (_state->freezed()) return; configuration_update_request proposal; proposal.config = pc; if (pc.primary.is_invalid()) { if (pc.secondaries.size() > 0) { proposal.node = pc.secondaries[dsn_random32(0, static_cast<int>(pc.secondaries.size()) - 1)]; proposal.type = CT_UPGRADE_TO_PRIMARY; } else if (pc.last_drops.size() == 0) { proposal.node = find_minimal_load_machine(true); proposal.type = CT_ASSIGN_PRIMARY; } // DDD else { proposal.node = *pc.last_drops.rbegin(); proposal.type = CT_ASSIGN_PRIMARY; derror("%s.%d.%d enters DDD state, we are waiting for its last primary node %s to come back ...", pc.app_type.c_str(), pc.gpid.app_id, pc.gpid.pidx, proposal.node.to_string() ); } if (proposal.node.is_invalid() == false) { send_proposal(proposal.node, proposal); } } else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count) { proposal.type = CT_ADD_SECONDARY; proposal.node = find_minimal_load_machine(false); if (proposal.node.is_invalid() == false && proposal.node != pc.primary && std::find(pc.secondaries.begin(), pc.secondaries.end(), proposal.node) == pc.secondaries.end()) { send_proposal(pc.primary, proposal); } } else { // it is healthy, nothing to do } }
static void fault_on_rpc_response_enqueue(rpc_response_task* resp) { fj_opt& opt = s_fj_opts[resp->spec().code]; if (resp->delay_milliseconds() == 0 && task_ext_for_fj::get(resp) == 0) { resp->set_delay(dsn_random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max)); task_ext_for_fj::get(resp) = 1; // ensure only fd once } }
static void fault_on_rpc_request_enqueue(rpc_request_task* callee) { fj_opt& opt = s_fj_opts[callee->spec().code]; if (callee->delay_milliseconds() == 0 && task_ext_for_fj::get(callee) == 0) { callee->set_delay(dsn_random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max)); task_ext_for_fj::get(callee) = 1; // ensure only fd once } }
void timer_task::enqueue() { // enable timer randomization to avoid lots of timers execution simultaneously if (delay_milliseconds() == 0 && spec().randomize_timer_delay_if_zero) { set_delay(dsn_random32(0, _interval_milliseconds)); } return task::enqueue(); }
timer_task::timer_task(dsn_task_code_t code, dsn_task_handler_t cb, void* param, uint32_t interval_milliseconds, int hash, service_node* node) : task(code, hash, node), _interval_milliseconds(interval_milliseconds), _cb(cb), _param(param) { dassert (TASK_TYPE_COMPUTE == spec().type, "this must be a computation type task, please use DEFINE_TASK_CODE to define the task code"); // enable timer randomization to avoid lots of timers execution simultaneously set_delay(dsn_random32(0, interval_milliseconds)); }
void sim_task_queue::enqueue(task* t) { dassert(0 == t->delay_milliseconds(), "delay time must be zero"); if (_tasks.size() > 0) { do { int random_pos = dsn_random32(0, 1000000); auto pr = _tasks.insert(std::map<uint32_t, task*>::value_type(random_pos, t)); if (pr.second) break; } while (true); } else { int random_pos = dsn_random32(0, 1000000); _tasks.insert(std::map<uint32_t, task*>::value_type(random_pos, t)); } // for scheduling if (task::get_current_worker()) { scheduler::instance().wait_schedule(true, true); } }
void server_state::switch_meta_primary() { zauto_write_lock l(_meta_lock); if (1 == _meta_servers.size()) return; while (true) { int r = dsn_random32(0, (uint32_t)_meta_servers.size() - 1); if (r != _leader_index) { _leader_index = r; return; } } }
::dsn::rpc_address load_balancer::find_minimal_load_machine(bool primaryOnly) { std::vector<std::pair< ::dsn::rpc_address, int>> stats; for (auto it = _state->_nodes.begin(); it != _state->_nodes.end(); it++) { if (it->second.is_alive) { stats.push_back(std::make_pair(it->first, static_cast<int>(primaryOnly ? it->second.primaries.size() : it->second.partitions.size()))); } } std::sort(stats.begin(), stats.end(), [](const std::pair< ::dsn::rpc_address, int>& l, const std::pair< ::dsn::rpc_address, int>& r) { return l.second < r.second || (l.second == r.second && l.first < r.first); }); if (stats.empty()) { return ::dsn::rpc_address(); } if (s_lb_for_test) { // alway use the first (minimal) one return stats[0].first; } int candidate_count = 1; int val = stats[0].second; for (size_t i = 1; i < stats.size(); i++) { if (stats[i].second > val) break; candidate_count++; } return stats[dsn_random32(0, candidate_count - 1)].first; }
/*search in cache*/ rpc_address partition_resolver_simple::get_address(const partition_configuration& config) const { if (_app_is_stateful) { return config.primary; } else { if (config.last_drops.size() == 0) { return rpc_address(); } else { return config.last_drops[dsn_random32(0, config.last_drops.size() - 1)]; } } //if (is_write || semantic == read_semantic::ReadLastUpdate) // return config.primary; //// readsnapshot or readoutdated, using random //else //{ // bool has_primary = false; // int N = static_cast<int>(config.secondaries.size()); // if (!config.primary.is_invalid()) // { // N++; // has_primary = true; // } // if (0 == N) return config.primary; // int r = random32(0, 1000) % N; // if (has_primary && r == N - 1) // return config.primary; // else // return config.secondaries[r]; //} }
void server_state::remove_meta_node(const ::dsn::rpc_address& node) { zauto_write_lock l(_meta_lock); int i = -1; for (auto it = _meta_servers.begin(); it != _meta_servers.end(); it++) { i++; if (*it == node) { _meta_servers.erase(it); if (_meta_servers.size() == 0) _leader_index = -1; else if (i == _leader_index) { _leader_index = dsn_random32(0, (uint32_t)_meta_servers.size() - 1); } return; } } dassert (false, "cannot find node '%s:%hu' in server state", node.name(), node.port()); }
void simple_load_balancer::run_lb(app_info& info, partition_configuration& pc, bool is_stateful) { if (_state->freezed() && is_stateful) return; configuration_update_request proposal; proposal.config = pc; proposal.info = info; if (is_stateful) { if (pc.primary.is_invalid()) { if (pc.secondaries.size() > 0) { if (s_lb_for_test) { std::vector< ::dsn::rpc_address> tmp(pc.secondaries); std::sort(tmp.begin(), tmp.end()); proposal.node = tmp[0]; } else { proposal.node = pc.secondaries[dsn_random32(0, static_cast<int>(pc.secondaries.size()) - 1)]; } proposal.type = config_type::CT_UPGRADE_TO_PRIMARY; } else if (pc.last_drops.size() == 0) { proposal.node = find_minimal_load_machine(true); proposal.type = config_type::CT_ASSIGN_PRIMARY; } // DDD else { proposal.node = *pc.last_drops.rbegin(); proposal.type = config_type::CT_ASSIGN_PRIMARY; derror("%s.%d.%d enters DDD state, we are waiting for its last primary node %s to come back ...", info.app_type.c_str(), pc.pid.get_app_id(), pc.pid.get_partition_index(), proposal.node.to_string() ); } if (proposal.node.is_invalid() == false) { send_proposal(proposal.node, proposal); } } else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count) { proposal.type = config_type::CT_ADD_SECONDARY; proposal.node = find_minimal_load_machine(false); if (proposal.node.is_invalid() == false && proposal.node != pc.primary && std::find(pc.secondaries.begin(), pc.secondaries.end(), proposal.node) == pc.secondaries.end()) { send_proposal(pc.primary, proposal); } } else { // it is healthy, nothing to do } } // stateless else { partition_configuration_stateless pcs(pc); if (static_cast<int>(pcs.worker_replicas().size()) < pc.max_replica_count) { proposal.type = config_type::CT_ADD_SECONDARY; proposal.node = find_minimal_load_machine(false); if (proposal.node.is_invalid() == false) { bool send = true; for (auto& s : pc.secondaries) { // not on the same machine if (s == proposal.node) { send = false; break; } } if (send) { send_proposal(proposal.node, proposal); } } } else { // it is healthy, nothing to do } } }
// run in replica thread void replica::init_checkpoint() { // only applicable to primary and secondary replicas if (status() != PS_PRIMARY && status() != PS_SECONDARY) return; // no need to checkpoint if (_app->is_delta_state_learning_supported()) return; auto err = _app->checkpoint_async(); if (err != ERR_NOT_IMPLEMENTED) { if (err == ERR_OK) { ddebug("%s: checkpoint_async succeed, app_last_committed_decree=%" PRId64 ", app_last_durable_decree=%" PRId64, name(), _app->last_committed_decree(), _app->last_durable_decree()); } if (err != ERR_OK && err != ERR_WRONG_TIMING && err != ERR_NO_NEED_OPERATE && err != ERR_TRY_AGAIN) { derror("%s: checkpoint_async failed, err = %s", name(), err.to_string()); } return; } // private log must be enabled to make sure commits // are not lost during checkpinting dassert(nullptr != _private_log, "log_enable_private_prepare must be true for checkpointing"); if (last_committed_decree() - last_durable_decree() < _options->checkpoint_min_decree_gap) return; // primary cannot checkpoint (TODO: test if async checkpoint is supported) // therefore we have to copy checkpoints from secondaries if (PS_PRIMARY == status()) { // only one running instance if (nullptr == _primary_states.checkpoint_task) { if (_primary_states.membership.secondaries.size() == 0) return; std::shared_ptr<replica_configuration> rc(new replica_configuration); _primary_states.get_replica_config(PS_SECONDARY, *rc); rpc_address sd = _primary_states.membership.secondaries [dsn_random32(0, (int)_primary_states.membership.secondaries.size() - 1)]; _primary_states.checkpoint_task = rpc::call_typed( sd, RPC_REPLICA_COPY_LAST_CHECKPOINT, rc, this, &replica::on_copy_checkpoint_ack, gpid_to_hash(get_gpid()) ); } } // secondary can start checkpint in the long running thread pool else { dassert(PS_SECONDARY == status(), ""); // only one running instance if (!_secondary_states.checkpoint_is_running) { _secondary_states.checkpoint_is_running = true; tasking::enqueue( &_secondary_states.checkpoint_task, LPC_CHECKPOINT_REPLICA, this, &replica::background_checkpoint, gpid_to_hash(get_gpid()) ); } } }
uint32_t sim_network_provider::net_delay_milliseconds() const { return static_cast<uint32_t>(dsn_random32(_min_message_delay_microseconds, _max_message_delay_microseconds)) / 1000; }
void scheduler::schedule() { _is_scheduling = true; check(); // check before schedule while (true) { // run ready workers whenever possible std::vector<int> ready_workers; for (auto& s : _threads) { if ((s->in_continuation && s->is_continuation_ready) || (!s->in_continuation && s->worker->queue()->approx_count() > 0) ) { ready_workers.push_back(s->index); } } if (ready_workers.size() > 0) { int i = dsn_random32(0, (uint32_t)ready_workers.size() - 1); _threads[ready_workers[i]]->runnable.release(); _is_scheduling = false; return; } // otherwise, run the timed tasks uint64_t ts = 0; auto events = _wheel.pop_next_events(ts); if (events) { { utils::auto_lock< ::dsn::utils::ex_lock> l(_lock); _time_ns = ts; } // randomize the events, and see std::random_shuffle(events->begin(), events->end(), [](int n) { return dsn_random32(0, n - 1); }); for (auto e : *events) { if (e.app_task != nullptr) { task* t = e.app_task; { node_scoper ns(t->node()); t->enqueue(); } t->release_ref(); // added by previous t->enqueue from app } else { dassert(e.system_task != nullptr, "app and system tasks cannot be both empty"); e.system_task(); } } delete events; continue; } // wait a moment std::this_thread::sleep_for(std::chrono::milliseconds(100)); } _is_scheduling = false; }