void test_injector::install(service_spec& svc_spec) { for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; task_spec* spec = task_spec::get(i); spec->on_task_enqueue.put_back(inject_on_task_enqueue, "test_injector"); spec->on_task_begin.put_back(inject_on_task_begin, "test_injector"); spec->on_task_end.put_back(inject_on_task_end, "test_injector"); spec->on_task_cancelled.put_back(inject_on_task_cancelled, "test_injector"); spec->on_task_wait_pre.put_back(inject_on_task_wait_pre, "test_injector"); spec->on_task_wait_post.put_back(inject_on_task_wait_post, "test_injector"); spec->on_task_cancel_post.put_back(inject_on_task_cancel_post, "test_injector"); spec->on_aio_call.put_native(inject_on_aio_call); spec->on_aio_enqueue.put_back(inject_on_aio_enqueue, "test_injector"); spec->on_rpc_call.put_native(inject_on_rpc_call); spec->on_rpc_request_enqueue.put_native(inject_on_rpc_request_enqueue); spec->on_rpc_reply.put_native(inject_on_rpc_reply); spec->on_rpc_response_enqueue.put_native(inject_on_rpc_response_enqueue); } //ddebug("=== test_injector installed"); }
void register_command_profiler() { std::stringstream tmpss; tmpss << "NAME:" << std::endl; tmpss << " profiler - collect performance data" << std::endl; tmpss << "SYNOPSIS:" << std::endl; tmpss << " show how tasks call each other with what frequency:" << std::endl; tmpss << " p|P|profile|Profile task|t dependency|dep matrix" << std::endl; tmpss << " show how tasks call each oether with list format sort by caller/callee:" << std::endl; tmpss << " p|P|profile|Profile task|t dependency|dep list [$task] [caller(default)|callee]" << std::endl; tmpss << " show performance data for specific tasks:" << std::endl; tmpss << " p|P|profile|Profile task|t info [all|$task]:" << std::endl; tmpss << " show the top N task kinds sort by counter_name:" << std::endl; tmpss << " p|P|profile|Profile task|t top $N $counter_name [$percentile]:" << std::endl; tmpss << "ARGUMENTS:" << std::endl; tmpss << " $percentile : e.g, 50 for latency at 50 percentile, 50(default)|90|95|99|999:" << std::endl; tmpss << " $counter_name :" << std::endl; for (int i = 0; i < PREF_COUNTER_COUNT; i++) { tmpss << " " << std::setw(data_width) << counter_info_ptr[i]->title << " :"; for (size_t j = 0; j < counter_info_ptr[i]->keys.size(); j++) { tmpss << " " << counter_info_ptr[i]->keys[j]; } tmpss << std::endl; } tmpss << " $task : all task code, such as" << std::endl; for (int i = 1; i < dsn_task_code_max() && i <= 10; i++) { tmpss << " " << dsn_task_code_to_string(i) << std::endl; } register_command({ "p", "P", "profile", "Profile"}, "profile|Profile|p|P - performance profiling", tmpss.str().c_str(), profiler_output_handler); }
void fault_injector::install(service_spec& spec) { task_ext_for_fj::register_ext(); s_fj_opts = new fj_opt[dsn_task_code_max() + 1]; fj_opt default_opt; read_config("task..default", default_opt); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string section_name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert (spec != nullptr, "task_spec cannot be null"); fj_opt& lopt = s_fj_opts[i]; read_config(section_name.c_str(), lopt, &default_opt); if (!lopt.fault_injection_enabled) continue; //spec->on_task_enqueue.put_back(fault_on_task_enqueue, "fault_injector"); //spec->on_task_begin.put_back(fault_on_task_begin, "fault_injector"); spec->on_task_end.put_back(fault_on_task_end, "fault_injector"); //spec->on_task_cancelled.put_back(fault_on_task_cancelled, "fault_injector"); //spec->on_task_wait_pre.put_back(fault_on_task_wait_pre, "fault_injector"); //spec->on_task_wait_post.put_back(fault_on_task_wait_post, "fault_injector"); //spec->on_task_cancel_post.put_back(fault_on_task_cancel_post, "fault_injector"); spec->on_aio_call.put_native(fault_on_aio_call); spec->on_aio_enqueue.put_back(fault_on_aio_enqueue, "fault_injector"); spec->on_rpc_call.put_native(fault_on_rpc_call); spec->on_rpc_request_enqueue.put_back(fault_on_rpc_request_enqueue, "fault_injector"); spec->on_rpc_reply.put_native(fault_on_rpc_reply); spec->on_rpc_response_enqueue.put_back(fault_on_rpc_response_enqueue, "fault_injector"); } if (default_opt.node_crash_minutes_max > 0) { // TODO: } }
void register_command_profiler() { std::stringstream textp, textpjs, textpd, textarg; textp << "NAME:" << std::endl; textp << " profiler - collect performance data" << std::endl; textp << "SYNOPSIS:" << std::endl; textp << " show how tasks call each other with what frequency:" << std::endl; textp << " p|P|profile|Profile dependency|dep matrix" << std::endl; textp << " show how tasks call each oether with list format sort by caller/callee:" << std::endl; textp << " p|P|profile|Profile dependency|dep list [$task] [caller(default)|callee]" << std::endl; textp << " show performance data for specific tasks:" << std::endl; textp << " p|P|profile|Profile info [all|$task]" << std::endl; textp << " show the top N task kinds sort by counter_name:" << std::endl; textp << " p|P|profile|Profile top $N $counter_name [$percentile]" << std::endl; textpjs << "NAME:" << std::endl; textpjs << " profile javascript - collect performance data and show as chart by javascript" << std::endl; textpjs << "SYNOPSIS:" << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript $chart_type task|t $task_name [$percentile] [$counter_name $counter_name ...]" << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript $chart_type counter|c $counter_name [$percentile] $task_name $task_name ..." << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript top $N $counter_name [$percentile]" << std::endl; textpd << "NAME:" << std::endl; textpd << " profiler data - get appointed data, using by pjs" << std::endl; textpd << "SYNOPSIS:" << std::endl; textpd << " pd|PD|profiledata|ProfileData $task_name:$counter_name:$percentile ..." << std::endl; textpd << " pd|PD|profiledata|ProfileData $task_name:AllPercentile:$percentile" << std::endl; textarg << "ARGUMENTS:" << std::endl; textarg << " $percentile : e.g, 50 for latency at 50 percentile, 50(default)|90|95|99|999" << std::endl; textarg << " $counter_name :" << std::endl; for (int i = 0; i < PREF_COUNTER_COUNT; i++) { textarg << " " << std::setw(data_width) << counter_info_ptr[i]->title << " :"; for (size_t j = 0; j < counter_info_ptr[i]->keys.size(); j++) { textarg << " " << counter_info_ptr[i]->keys[j]; } textarg << std::endl; } textarg << " $task : all task code, such as" << std::endl; for (int i = 1; i < dsn_task_code_max() && i <= 10; i++) { textarg << " " << dsn_task_code_to_string(i) << std::endl; } textp << textarg.str(); textpjs << textarg.str(); textpd << textarg.str(); register_command({ "p", "P", "profile", "Profile" }, "profile|Profile|p|P - performance profiling", textp.str().c_str(), profiler_output_handler); //register_command({ "pjs", "PJS", "profilejavascript", "ProfileJavaScript", nullptr }, "pjs|PJS|profilejavascript|ProfileJavaScript - profile and show by javascript", textpjs.str().c_str(), profiler_js_handler); register_command({ "pd", "PD", "profiledata", "ProfileData" }, "profiler data - get appointed data, using by pjs", textpd.str().c_str(), profiler_data_handler); }
std::string query_data_handler(const std::vector<std::string>& args) { int task_id; perf_counter_ptr_type counter_type; counter_percentile_type percentile_type; std::stringstream ss; for (int i = 0; i <= dsn_task_code_max(); ++i) { task_id = i; if ((i == TASK_CODE_INVALID) || (s_spec_profilers[task_id].is_profile == false)) continue; for (int j = 0; j < COUNTER_PERCENTILE_COUNT; ++j) { percentile_type = static_cast<counter_percentile_type>(j); ss << "<tr>" << "<td>" << dsn_task_code_to_string(task_id) << "</td>"; ss << "<td>" << percentail_counter_string[percentile_type] << "</td>"; for (int k = 0; k < PREF_COUNTER_COUNT; k++) { counter_type = static_cast<perf_counter_ptr_type>(k); if (s_spec_profilers[task_id].ptr[counter_type] == NULL) { ss << "<td></td>"; } else { if (counter_info_ptr[counter_type]->type == COUNTER_TYPE_NUMBER_PERCENTILES) { ss << "<td>" << s_spec_profilers[task_id].ptr[counter_type]->get_percentile(percentile_type) << "</td>"; } else { auto res = s_spec_profilers[task_id].ptr[counter_type]->get_value(); if (std::isnan(res)) ss << "<td>" << "NAN" << "</td>"; else ss << "<td>" << res << "</td>"; } } } ss << "</tr>"; } } return ss.str(); }
scheduler::scheduler(void) { _time_ns = 0; _running = false; task_worker::on_create.put_back(on_task_worker_create, "simulation.on_task_worker_create"); task_worker::on_start.put_back(on_task_worker_start, "simulation.on_task_worker_start"); for (int i = 0; i <= dsn_task_code_max(); i++) { task_spec::get(i)->on_task_wait_pre.put_back(scheduler::on_task_wait, "simulation.on_task_wait"); task_spec::get(i)->on_task_wait_notified.put_back(scheduler::on_task_wait_notified, "simulation.on_task_wait_notified"); } task_ext::register_ext(task_state_ext::deletor); task_worker_ext::register_ext(sim_worker_state::deletor); }
bool run(const char* config_file, const char* config_arguments, bool sleep_after_init, std::string& app_list) { dsn_core_init(); ::dsn::task::set_tls_dsn_context(nullptr, nullptr, nullptr); dsn_all.engine_ready = false; dsn_all.config_completed = false; dsn_all.tool = nullptr; dsn_all.engine = &::dsn::service_engine::instance(); dsn_all.config.reset(new ::dsn::configuration()); dsn_all.memory = nullptr; dsn_all.magic = 0xdeadbeef; if (!dsn_all.config->load(config_file, config_arguments)) { printf("Fail to load config file %s\n", config_file); return false; } // pause when necessary if (dsn_all.config->get_value<bool>("core", "pause_on_start", false, "whether to pause at startup time for easier debugging")) { #if defined(_WIN32) printf("\nPause for debugging (pid = %d)...\n", static_cast<int>(::GetCurrentProcessId())); #else printf("\nPause for debugging (pid = %d)...\n", static_cast<int>(getpid())); #endif getchar(); } // regiser external app roles by loading all shared libraries // so all code and app factories are automatically registered dsn::service_spec::load_app_shared_libraries(dsn_all.config); for (int i = 0; i <= dsn_task_code_max(); i++) { dsn_all.task_specs.push_back(::dsn::task_spec::get(i)); } // initialize global specification from config file ::dsn::service_spec spec; spec.config = dsn_all.config; if (!spec.init()) { printf("error in config file %s, exit ...\n", config_file); return false; } dsn_all.config_completed = true; // setup data dir auto& data_dir = spec.data_dir; dassert(!dsn::utils::filesystem::file_exists(data_dir), "%s should not be a file.", data_dir.c_str()); if (!dsn::utils::filesystem::directory_exists(data_dir.c_str())) { if (!dsn::utils::filesystem::create_directory(data_dir)) { dassert(false, "Fail to create %s.", data_dir.c_str()); } } std::string cdir; if (!dsn::utils::filesystem::get_absolute_path(data_dir.c_str(), cdir)) { dassert(false, "Fail to get absolute path from %s.", data_dir.c_str()); } spec.data_dir = cdir; // setup coredump dir spec.dir_coredump = ::dsn::utils::filesystem::path_combine(cdir, "coredumps"); dsn::utils::filesystem::create_directory(spec.dir_coredump); ::dsn::utils::coredump::init(spec.dir_coredump.c_str()); // setup log dir spec.dir_log = ::dsn::utils::filesystem::path_combine(cdir, "logs"); dsn::utils::filesystem::create_directory(spec.dir_log); // init tools dsn_all.tool = ::dsn::utils::factory_store< ::dsn::tools::tool_app>::create(spec.tool.c_str(), ::dsn::PROVIDER_TYPE_MAIN, spec.tool.c_str()); dsn_all.tool->install(spec); // init app specs if (!spec.init_app_specs()) { printf("error in config file %s, exit ...\n", config_file); return false; } // init tool memory dsn_all.memory = ::dsn::utils::factory_store< ::dsn::memory_provider>::create( spec.tools_memory_factory_name.c_str(), ::dsn::PROVIDER_TYPE_MAIN); // prepare minimum necessary ::dsn::service_engine::fast_instance().init_before_toollets(spec); // init logging dsn_log_init(); // init toollets for (auto it = spec.toollets.begin(); it != spec.toollets.end(); ++it) { auto tlet = dsn::tools::internal_use_only::get_toollet(it->c_str(), ::dsn::PROVIDER_TYPE_MAIN); dassert(tlet, "toolet not found"); tlet->install(spec); } // init provider specific system inits dsn::tools::sys_init_before_app_created.execute(::dsn::service_engine::fast_instance().spec().config); // TODO: register sys_exit execution // init runtime ::dsn::service_engine::fast_instance().init_after_toollets(); dsn_all.engine_ready = true; // split app_name and app_index std::list<std::string> applistkvs; ::dsn::utils::split_args(app_list.c_str(), applistkvs, ';'); // init apps for (auto& sp : spec.app_specs) { if (!sp.run) continue; bool create_it = false; if (app_list == "") // create all apps { create_it = true; } else { for (auto &kv : applistkvs) { std::list<std::string> argskvs; ::dsn::utils::split_args(kv.c_str(), argskvs, '@'); if (std::string("apps.") + argskvs.front() == sp.config_section) { if (argskvs.size() < 2) create_it = true; else create_it = (std::stoi(argskvs.back()) == sp.index); break; } } } if (create_it) { ::dsn::service_engine::fast_instance().start_node(sp); } } if (::dsn::service_engine::fast_instance().get_all_nodes().size() == 0) { printf("no app are created, usually because \n" "app_name is not specified correctly, should be 'xxx' in [apps.xxx]\n" "or app_index (1-based) is greater than specified count in config file\n" ); exit(1); } // start cli if necessary if (dsn_all.config->get_value<bool>("core", "cli_local", true, "whether to enable local command line interface (cli)")) { ::dsn::command_manager::instance().start_local_cli(); } if (dsn_all.config->get_value<bool>("core", "cli_remote", true, "whether to enable remote command line interface (using dsn.cli)")) { ::dsn::command_manager::instance().start_remote_cli(); } // register local cli commands ::dsn::register_command("config-dump", "config-dump - dump configuration", "config-dump [to-this-config-file]", [](const std::vector<std::string>& args) { std::ostringstream oss; std::ofstream off; std::ostream* os = &oss; if (args.size() > 0) { off.open(args[0]); os = &off; oss << "config dump to file " << args[0] << std::endl; } dsn_all.config->dump(*os); return oss.str(); }); // invoke customized init after apps are created dsn::tools::sys_init_after_app_created.execute(::dsn::service_engine::fast_instance().spec().config); // start the tool dsn_all.tool->run(); // if (sleep_after_init) { while (true) { std::this_thread::sleep_for(std::chrono::hours(1)); } } // add this to allow mimic app call from this thread. memset((void*)&dsn::tls_dsn, 0, sizeof(dsn::tls_dsn)); return true; }
void profiler::install(service_spec& spec) { s_spec_profilers = new task_spec_profiler[dsn_task_code_max() + 1]; task_ext_for_profiler::register_ext(); message_ext_for_profiler::register_ext(); dassert(sizeof(counter_info_ptr) / sizeof(counter_info*) == PREF_COUNTER_COUNT, "PREF COUNTER ERROR"); auto profile = config()->get_value<bool>("task..default", "is_profile", false, "whether to profile this kind of task"); auto collect_call_count = config()->get_value<bool>("task..default", "collect_call_count", true, "whether to collect how many time this kind of tasks invoke each of other kinds tasks"); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert(spec != nullptr, "task_spec cannot be null"); s_spec_profilers[i].collect_call_count = config()->get_value<bool>(name.c_str(), "collect_call_count", collect_call_count, "whether to collect how many time this kind of tasks invoke each of other kinds tasks" ); s_spec_profilers[i].call_counts = new std::atomic<int64_t>[dsn_task_code_max() + 1]; s_spec_profilers[i].ptr[TASK_QUEUEING_TIME_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".queue(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[TASK_EXEC_TIME_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".exec(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[TASK_THROUGHPUT] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".qps")).c_str(), COUNTER_TYPE_RATE, true); s_spec_profilers[i].ptr[TASK_CANCELLED] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".cancelled#")).c_str(), COUNTER_TYPE_NUMBER, true); if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_REQUEST) { s_spec_profilers[i].ptr[RPC_SERVER_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.server")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); } else if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_RESPONSE) { s_spec_profilers[i].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.client(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".timeout.qps")).c_str(), COUNTER_TYPE_RATE, true); } else if (spec->type == dsn_task_type_t::TASK_TYPE_AIO) { s_spec_profilers[i].ptr[AIO_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); } s_spec_profilers[i].is_profile = config()->get_value<bool>(name.c_str(), "is_profile", profile, "whether to profile this kind of task"); if (!s_spec_profilers[i].is_profile) continue; spec->on_task_enqueue.put_back(profiler_on_task_enqueue, "profiler"); spec->on_task_begin.put_back(profiler_on_task_begin, "profiler"); spec->on_task_end.put_back(profiler_on_task_end, "profiler"); spec->on_task_cancelled.put_back(profiler_on_task_cancelled, "profiler"); //spec->on_task_wait_pre.put_back(profiler_on_task_wait_pre, "profiler"); //spec->on_task_wait_post.put_back(profiler_on_task_wait_post, "profiler"); //spec->on_task_cancel_post.put_back(profiler_on_task_cancel_post, "profiler"); spec->on_aio_call.put_back(profiler_on_aio_call, "profiler"); spec->on_aio_enqueue.put_back(profiler_on_aio_enqueue, "profiler"); spec->on_rpc_call.put_back(profiler_on_rpc_call, "profiler"); spec->on_rpc_request_enqueue.put_back(profiler_on_rpc_request_enqueue, "profiler"); spec->on_rpc_create_response.put_back(profiler_on_rpc_create_response, "profiler"); spec->on_rpc_reply.put_back(profiler_on_rpc_reply, "profiler"); spec->on_rpc_response_enqueue.put_back(profiler_on_rpc_response_enqueue, "profiler"); } register_command_profiler(); }
void tracer::install(service_spec& spec) { auto trace = dsn_config_get_value_bool("task..default", "is_trace", false, "whether to trace tasks by default"); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string section_name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert (spec != nullptr, "task_spec cannot be null"); if (!dsn_config_get_value_bool(section_name.c_str(), "is_trace", trace, "whether to trace this kind of task")) continue; if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_enqueue", true, "whether to trace when a timer or async task is enqueued")) spec->on_task_enqueue.put_back(tracer_on_task_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_begin", true, "whether to trace when a task begins")) spec->on_task_begin.put_back(tracer_on_task_begin, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_end", true, "whether to trace when a task ends")) spec->on_task_end.put_back(tracer_on_task_end, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancelled", true, "whether to trace when a task is cancelled")) spec->on_task_cancelled.put_back(tracer_on_task_cancelled, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_pre", true, "whether to trace when a task is to be wait")) spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_post", true, "whether to trace when a task is wait post")) spec->on_task_wait_post.put_back(tracer_on_task_wait_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancel_post", true, "whether to trace when a task is cancel post")) spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_call", true, "whether to trace when an aio task is called")) spec->on_aio_call.put_back(tracer_on_aio_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_enqueue", true, "whether to trace when an aio task is enqueued")) spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_call", true, "whether to trace when a rpc is made")) spec->on_rpc_call.put_back(tracer_on_rpc_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_request_enqueue", true, "whether to trace when a rpc request task is enqueued")) spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_reply", true, "whether to trace when reply a rpc request")) spec->on_rpc_reply.put_back(tracer_on_rpc_reply, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_response_enqueue", true, "whetehr to trace when a rpc response task is enqueued")) spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, "tracer"); } register_command({ "tracer.find" }, "tracer.find - find related logs", "tracer.find forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., a023003920302390) log_file_name(log.xx.txt)", tracer_log_flow ); }