std::string profiler_data_handler(const std::vector<std::string>& args) { size_t k = args.size(); int task_id; perf_counter_ptr_type counter_type; counter_percentile_type percentile_type; std::stringstream ss; std::vector<std::string> val; if ((args.size() > 0) && (args[0] == "top")) { if (k < 4) { return ss.str(); } int num = atoi(args[1].c_str()); counter_type = find_counter_type(args[2]); percentile_type = find_percentail_type(args[3]); if ((num == 0) || (counter_type == PREF_COUNTER_INVALID) || (percentile_type == COUNTER_PERCENTILE_INVALID)) { return ss.str(); } profiler_data_top(ss, counter_type, percentile_type, num); return ss.str(); } for (int i = 0; i < k; i++) { utils::split_args(args[i].c_str(), val, ':'); task_id = find_task_id(val[0]); counter_type = find_counter_type(val[1]); percentile_type = find_percentail_type(val[2]); if ((task_id != TASK_CODE_INVALID) && (counter_type != PREF_COUNTER_INVALID) && (s_spec_profilers[task_id].ptr[counter_type] != NULL) && (s_spec_profilers[task_id].is_profile != false)) { ss << dsn_task_code_to_string(task_id) << ":" << counter_info_ptr[counter_type]->title << ":" << percentail_counter_string[percentile_type] << ":"; if (counter_info_ptr[counter_type]->type != COUNTER_TYPE_NUMBER_PERCENTILES) { ss << s_spec_profilers[task_id].ptr[counter_type]->get_value() << " "; } else if (percentile_type != COUNTER_PERCENTILE_INVALID) { ss << s_spec_profilers[task_id].ptr[counter_type]->get_percentile(percentile_type) << " "; } } else if ((task_id != TASK_CODE_INVALID) && (val[1] == "AllPercentile") && (s_spec_profilers[task_id].is_profile != false)) { for (int j = 0; j < PREF_COUNTER_COUNT; j++) { if ((s_spec_profilers[task_id].ptr[j] != NULL) && (counter_info_ptr[j]->type == COUNTER_TYPE_NUMBER_PERCENTILES)) { ss << dsn_task_code_to_string(i) << ":" << counter_info_ptr[j]->title << ":" << percentail_counter_string[percentile_type] << ":" << s_spec_profilers[task_id].ptr[j]->get_percentile(percentile_type) << " "; } } } } return ss.str(); }
message_ex* message_ex::create_request(dsn_task_code_t rpc_code, int timeout_milliseconds, int hash) { message_ex* msg = new message_ex(); msg->_is_read = false; msg->prepare_buffer_header(); // init header auto& hdr = *msg->header; memset(&hdr, 0, sizeof(hdr)); hdr.hdr_crc32 = hdr.body_crc32 = CRC_INVALID; if (DSN_INVALID_HASH != hash) hdr.client.hash = hash; if (0 == timeout_milliseconds) { hdr.client.timeout_ms = task_spec::get(rpc_code)->rpc_timeout_milliseconds; } else { hdr.client.timeout_ms = timeout_milliseconds; } strncpy(hdr.rpc_name, dsn_task_code_to_string(rpc_code), sizeof(hdr.rpc_name)); hdr.id = new_id(); msg->local_rpc_code = (uint16_t)rpc_code; return msg; }
replication_app_client_base::request_context* replication_app_client_base::create_read_context( uint64_t key_hash, dsn_task_code_t code, dsn_message_t request, ::dsn::task_ptr& callback, read_semantic_t read_semantic, decree snapshot_decree, // only used when ReadSnapshot int reply_hash ) { dsn_msg_options_t opts; dsn_msg_get_options(request, &opts); auto rc = new request_context; rc->request = request; rc->callback_task = callback; rc->is_read = true; rc->partition_index = -1; rc->key_hash = key_hash; rc->read_header.gpid.app_id = _app_id; rc->read_header.gpid.pidx = -1; rc->read_header.code = dsn_task_code_to_string(code); rc->read_header.semantic = read_semantic; rc->read_header.version_decree = snapshot_decree; rc->timeout_timer = nullptr; rc->timeout_ms = opts.timeout_ms; rc->timeout_ts_us = now_us() + opts.timeout_ms * 1000; rc->completed = false; size_t offset = dsn_msg_body_size(request); ::marshall(request, rc->read_header); rc->header_pos = (char*)dsn_msg_rw_ptr(request, offset); return rc; }
void register_command_profiler() { std::stringstream tmpss; tmpss << "NAME:" << std::endl; tmpss << " profiler - collect performance data" << std::endl; tmpss << "SYNOPSIS:" << std::endl; tmpss << " show how tasks call each other with what frequency:" << std::endl; tmpss << " p|P|profile|Profile task|t dependency|dep matrix" << std::endl; tmpss << " show how tasks call each oether with list format sort by caller/callee:" << std::endl; tmpss << " p|P|profile|Profile task|t dependency|dep list [$task] [caller(default)|callee]" << std::endl; tmpss << " show performance data for specific tasks:" << std::endl; tmpss << " p|P|profile|Profile task|t info [all|$task]:" << std::endl; tmpss << " show the top N task kinds sort by counter_name:" << std::endl; tmpss << " p|P|profile|Profile task|t top $N $counter_name [$percentile]:" << std::endl; tmpss << "ARGUMENTS:" << std::endl; tmpss << " $percentile : e.g, 50 for latency at 50 percentile, 50(default)|90|95|99|999:" << std::endl; tmpss << " $counter_name :" << std::endl; for (int i = 0; i < PREF_COUNTER_COUNT; i++) { tmpss << " " << std::setw(data_width) << counter_info_ptr[i]->title << " :"; for (size_t j = 0; j < counter_info_ptr[i]->keys.size(); j++) { tmpss << " " << counter_info_ptr[i]->keys[j]; } tmpss << std::endl; } tmpss << " $task : all task code, such as" << std::endl; for (int i = 1; i < dsn_task_code_max() && i <= 10; i++) { tmpss << " " << dsn_task_code_to_string(i) << std::endl; } register_command({ "p", "P", "profile", "Profile"}, "profile|Profile|p|P - performance profiling", tmpss.str().c_str(), profiler_output_handler); }
void register_command_profiler() { std::stringstream textp, textpjs, textpd, textarg; textp << "NAME:" << std::endl; textp << " profiler - collect performance data" << std::endl; textp << "SYNOPSIS:" << std::endl; textp << " show how tasks call each other with what frequency:" << std::endl; textp << " p|P|profile|Profile dependency|dep matrix" << std::endl; textp << " show how tasks call each oether with list format sort by caller/callee:" << std::endl; textp << " p|P|profile|Profile dependency|dep list [$task] [caller(default)|callee]" << std::endl; textp << " show performance data for specific tasks:" << std::endl; textp << " p|P|profile|Profile info [all|$task]" << std::endl; textp << " show the top N task kinds sort by counter_name:" << std::endl; textp << " p|P|profile|Profile top $N $counter_name [$percentile]" << std::endl; textpjs << "NAME:" << std::endl; textpjs << " profile javascript - collect performance data and show as chart by javascript" << std::endl; textpjs << "SYNOPSIS:" << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript $chart_type task|t $task_name [$percentile] [$counter_name $counter_name ...]" << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript $chart_type counter|c $counter_name [$percentile] $task_name $task_name ..." << std::endl; textpjs << " pjs|PJS|profilejavascript|ProfileJavaScript top $N $counter_name [$percentile]" << std::endl; textpd << "NAME:" << std::endl; textpd << " profiler data - get appointed data, using by pjs" << std::endl; textpd << "SYNOPSIS:" << std::endl; textpd << " pd|PD|profiledata|ProfileData $task_name:$counter_name:$percentile ..." << std::endl; textpd << " pd|PD|profiledata|ProfileData $task_name:AllPercentile:$percentile" << std::endl; textarg << "ARGUMENTS:" << std::endl; textarg << " $percentile : e.g, 50 for latency at 50 percentile, 50(default)|90|95|99|999" << std::endl; textarg << " $counter_name :" << std::endl; for (int i = 0; i < PREF_COUNTER_COUNT; i++) { textarg << " " << std::setw(data_width) << counter_info_ptr[i]->title << " :"; for (size_t j = 0; j < counter_info_ptr[i]->keys.size(); j++) { textarg << " " << counter_info_ptr[i]->keys[j]; } textarg << std::endl; } textarg << " $task : all task code, such as" << std::endl; for (int i = 1; i < dsn_task_code_max() && i <= 10; i++) { textarg << " " << dsn_task_code_to_string(i) << std::endl; } textp << textarg.str(); textpjs << textarg.str(); textpd << textarg.str(); register_command({ "p", "P", "profile", "Profile" }, "profile|Profile|p|P - performance profiling", textp.str().c_str(), profiler_output_handler); //register_command({ "pjs", "PJS", "profilejavascript", "ProfileJavaScript", nullptr }, "pjs|PJS|profilejavascript|ProfileJavaScript - profile and show by javascript", textpjs.str().c_str(), profiler_js_handler); register_command({ "pd", "PD", "profiledata", "ProfileData" }, "profiler data - get appointed data, using by pjs", textpd.str().c_str(), profiler_data_handler); }
error_code replication_app_base::write_internal(mutation_ptr& mu) { dassert (mu->data.header.decree == last_committed_decree() + 1, ""); dassert(mu->client_requests.size() == mu->data.updates.size() && mu->client_requests.size() > 0, "data inconsistency in mutation"); int count = static_cast<int>(mu->client_requests.size()); _batch_state = (count == 1 ? BS_NOT_BATCH : BS_BATCH); for (int i = 0; i < count; i++) { if (_batch_state == BS_BATCH && i + 1 == count) { _batch_state = BS_BATCH_LAST; } auto& r = mu->client_requests[i]; if (r.code != RPC_REPLICATION_WRITE_EMPTY) { dinfo("%s: mutation %s dispatch rpc call: %s", _replica->name(), mu->name(), dsn_task_code_to_string(r.code)); binary_reader reader(mu->data.updates[i]); dsn_message_t resp = (r.req ? dsn_msg_create_response(r.req) : nullptr); uint64_t now = dsn_now_ns(); dispatch_rpc_call(r.code, reader, resp); now = dsn_now_ns() - now; _app_commit_latency.set(now); } else { // empty mutation write } if (_physical_error != 0) { derror("%s: physical error %d occurs in replication local app %s", _replica->name(), _physical_error, data_dir().c_str()); return ERR_LOCAL_APP_FAILURE; } } ++_last_committed_decree; _replica->update_commit_statistics(count); _app_commit_throughput.add((uint64_t)count); _app_commit_decree.increment(); return ERR_OK; }
std::string query_data_handler(const std::vector<std::string>& args) { int task_id; perf_counter_ptr_type counter_type; counter_percentile_type percentile_type; std::stringstream ss; for (int i = 0; i <= dsn_task_code_max(); ++i) { task_id = i; if ((i == TASK_CODE_INVALID) || (s_spec_profilers[task_id].is_profile == false)) continue; for (int j = 0; j < COUNTER_PERCENTILE_COUNT; ++j) { percentile_type = static_cast<counter_percentile_type>(j); ss << "<tr>" << "<td>" << dsn_task_code_to_string(task_id) << "</td>"; ss << "<td>" << percentail_counter_string[percentile_type] << "</td>"; for (int k = 0; k < PREF_COUNTER_COUNT; k++) { counter_type = static_cast<perf_counter_ptr_type>(k); if (s_spec_profilers[task_id].ptr[counter_type] == NULL) { ss << "<td></td>"; } else { if (counter_info_ptr[counter_type]->type == COUNTER_TYPE_NUMBER_PERCENTILES) { ss << "<td>" << s_spec_profilers[task_id].ptr[counter_type]->get_percentile(percentile_type) << "</td>"; } else { auto res = s_spec_profilers[task_id].ptr[counter_type]->get_value(); if (std::isnan(res)) ss << "<td>" << "NAN" << "</td>"; else ss << "<td>" << res << "</td>"; } } } ss << "</tr>"; } } return ss.str(); }
void fault_injector::install(service_spec& spec) { task_ext_for_fj::register_ext(); s_fj_opts = new fj_opt[dsn_task_code_max() + 1]; fj_opt default_opt; read_config("task..default", default_opt); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string section_name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert (spec != nullptr, "task_spec cannot be null"); fj_opt& lopt = s_fj_opts[i]; read_config(section_name.c_str(), lopt, &default_opt); if (!lopt.fault_injection_enabled) continue; //spec->on_task_enqueue.put_back(fault_on_task_enqueue, "fault_injector"); //spec->on_task_begin.put_back(fault_on_task_begin, "fault_injector"); spec->on_task_end.put_back(fault_on_task_end, "fault_injector"); //spec->on_task_cancelled.put_back(fault_on_task_cancelled, "fault_injector"); //spec->on_task_wait_pre.put_back(fault_on_task_wait_pre, "fault_injector"); //spec->on_task_wait_post.put_back(fault_on_task_wait_post, "fault_injector"); //spec->on_task_cancel_post.put_back(fault_on_task_cancel_post, "fault_injector"); spec->on_aio_call.put_native(fault_on_aio_call); spec->on_aio_enqueue.put_back(fault_on_aio_enqueue, "fault_injector"); spec->on_rpc_call.put_native(fault_on_rpc_call); spec->on_rpc_request_enqueue.put_back(fault_on_rpc_request_enqueue, "fault_injector"); spec->on_rpc_reply.put_native(fault_on_rpc_reply); spec->on_rpc_response_enqueue.put_back(fault_on_rpc_response_enqueue, "fault_injector"); } if (default_opt.node_crash_minutes_max > 0) { // TODO: } }
void profiler::install(service_spec& spec) { s_spec_profilers = new task_spec_profiler[dsn_task_code_max() + 1]; task_ext_for_profiler::register_ext(); message_ext_for_profiler::register_ext(); dassert(sizeof(counter_info_ptr) / sizeof(counter_info*) == PREF_COUNTER_COUNT, "PREF COUNTER ERROR"); auto profile = config()->get_value<bool>("task..default", "is_profile", false, "whether to profile this kind of task"); auto collect_call_count = config()->get_value<bool>("task..default", "collect_call_count", true, "whether to collect how many time this kind of tasks invoke each of other kinds tasks"); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert(spec != nullptr, "task_spec cannot be null"); s_spec_profilers[i].collect_call_count = config()->get_value<bool>(name.c_str(), "collect_call_count", collect_call_count, "whether to collect how many time this kind of tasks invoke each of other kinds tasks" ); s_spec_profilers[i].call_counts = new std::atomic<int64_t>[dsn_task_code_max() + 1]; s_spec_profilers[i].ptr[TASK_QUEUEING_TIME_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".queue(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[TASK_EXEC_TIME_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".exec(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[TASK_THROUGHPUT] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".qps")).c_str(), COUNTER_TYPE_RATE, true); s_spec_profilers[i].ptr[TASK_CANCELLED] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".cancelled#")).c_str(), COUNTER_TYPE_NUMBER, true); if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_REQUEST) { s_spec_profilers[i].ptr[RPC_SERVER_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.server")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); } else if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_RESPONSE) { s_spec_profilers[i].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.client(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); s_spec_profilers[i].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".timeout.qps")).c_str(), COUNTER_TYPE_RATE, true); } else if (spec->type == dsn_task_type_t::TASK_TYPE_AIO) { s_spec_profilers[i].ptr[AIO_LATENCY_NS] = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); } s_spec_profilers[i].is_profile = config()->get_value<bool>(name.c_str(), "is_profile", profile, "whether to profile this kind of task"); if (!s_spec_profilers[i].is_profile) continue; spec->on_task_enqueue.put_back(profiler_on_task_enqueue, "profiler"); spec->on_task_begin.put_back(profiler_on_task_begin, "profiler"); spec->on_task_end.put_back(profiler_on_task_end, "profiler"); spec->on_task_cancelled.put_back(profiler_on_task_cancelled, "profiler"); //spec->on_task_wait_pre.put_back(profiler_on_task_wait_pre, "profiler"); //spec->on_task_wait_post.put_back(profiler_on_task_wait_post, "profiler"); //spec->on_task_cancel_post.put_back(profiler_on_task_cancel_post, "profiler"); spec->on_aio_call.put_back(profiler_on_aio_call, "profiler"); spec->on_aio_enqueue.put_back(profiler_on_aio_enqueue, "profiler"); spec->on_rpc_call.put_back(profiler_on_rpc_call, "profiler"); spec->on_rpc_request_enqueue.put_back(profiler_on_rpc_request_enqueue, "profiler"); spec->on_rpc_create_response.put_back(profiler_on_rpc_create_response, "profiler"); spec->on_rpc_reply.put_back(profiler_on_rpc_reply, "profiler"); spec->on_rpc_response_enqueue.put_back(profiler_on_rpc_response_enqueue, "profiler"); } register_command_profiler(); }
void tracer::install(service_spec& spec) { auto trace = dsn_config_get_value_bool("task..default", "is_trace", false, "whether to trace tasks by default"); for (int i = 0; i <= dsn_task_code_max(); i++) { if (i == TASK_CODE_INVALID) continue; std::string section_name = std::string("task.") + std::string(dsn_task_code_to_string(i)); task_spec* spec = task_spec::get(i); dassert (spec != nullptr, "task_spec cannot be null"); if (!dsn_config_get_value_bool(section_name.c_str(), "is_trace", trace, "whether to trace this kind of task")) continue; if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_enqueue", true, "whether to trace when a timer or async task is enqueued")) spec->on_task_enqueue.put_back(tracer_on_task_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_begin", true, "whether to trace when a task begins")) spec->on_task_begin.put_back(tracer_on_task_begin, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_end", true, "whether to trace when a task ends")) spec->on_task_end.put_back(tracer_on_task_end, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancelled", true, "whether to trace when a task is cancelled")) spec->on_task_cancelled.put_back(tracer_on_task_cancelled, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_pre", true, "whether to trace when a task is to be wait")) spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_wait_post", true, "whether to trace when a task is wait post")) spec->on_task_wait_post.put_back(tracer_on_task_wait_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_task_cancel_post", true, "whether to trace when a task is cancel post")) spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_call", true, "whether to trace when an aio task is called")) spec->on_aio_call.put_back(tracer_on_aio_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_aio_enqueue", true, "whether to trace when an aio task is enqueued")) spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_call", true, "whether to trace when a rpc is made")) spec->on_rpc_call.put_back(tracer_on_rpc_call, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_request_enqueue", true, "whether to trace when a rpc request task is enqueued")) spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_reply", true, "whether to trace when reply a rpc request")) spec->on_rpc_reply.put_back(tracer_on_rpc_reply, "tracer"); if (dsn_config_get_value_bool(section_name.c_str(), "tracer::on_rpc_response_enqueue", true, "whetehr to trace when a rpc response task is enqueued")) spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, "tracer"); } register_command({ "tracer.find" }, "tracer.find - find related logs", "tracer.find forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., a023003920302390) log_file_name(log.xx.txt)", tracer_log_flow ); }
void meta_service::on_request(dsn_message_t msg) { meta_request_header hdr; ::unmarshall(msg, hdr); meta_response_header rhdr; bool is_primary = _state->get_meta_server_primary(rhdr.primary_address); if (is_primary) is_primary = (primary_address() == rhdr.primary_address); rhdr.err = ERR_OK; ::dsn::rpc_address faddr; dsn_msg_from_address(msg, faddr.c_addr_ptr()); dinfo("recv meta request %s from %s:%hu", dsn_task_code_to_string(hdr.rpc_tag), faddr.name(), faddr.port() ); dsn_message_t resp = dsn_msg_create_response(msg); if (!is_primary) { rhdr.err = ERR_TALK_TO_OTHERS; ::marshall(resp, rhdr); } else if (!_started) { rhdr.err = ERR_SERVICE_NOT_ACTIVE; ::marshall(resp, rhdr); } else if (hdr.rpc_tag == RPC_CM_QUERY_NODE_PARTITIONS) { configuration_query_by_node_request request; configuration_query_by_node_response response; ::unmarshall(msg, request); query_configuration_by_node(request, response); ::marshall(resp, rhdr); ::marshall(resp, response); } else if (hdr.rpc_tag == RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX) { configuration_query_by_index_request request; configuration_query_by_index_response response; unmarshall(msg, request); query_configuration_by_index(request, response); ::marshall(resp, rhdr); ::marshall(resp, response); } else if (hdr.rpc_tag == RPC_CM_UPDATE_PARTITION_CONFIGURATION) { update_configuration(msg, resp); rhdr.err.end_tracking(); return; } else { dassert(false, "unknown rpc tag %x (%s)", hdr.rpc_tag, dsn_task_code_to_string(hdr.rpc_tag)); } dsn_rpc_reply(resp); }