error_code replication_app_base::write_internal(mutation_ptr& mu) { dassert (mu->data.header.decree == last_committed_decree() + 1, ""); dassert(mu->client_requests.size() == mu->data.updates.size() && mu->client_requests.size() > 0, "data inconsistency in mutation"); int count = static_cast<int>(mu->client_requests.size()); _batch_state = (count == 1 ? BS_NOT_BATCH : BS_BATCH); for (int i = 0; i < count; i++) { if (_batch_state == BS_BATCH && i + 1 == count) { _batch_state = BS_BATCH_LAST; } auto& r = mu->client_requests[i]; if (r.code != RPC_REPLICATION_WRITE_EMPTY) { dinfo("%s: mutation %s dispatch rpc call: %s", _replica->name(), mu->name(), dsn_task_code_to_string(r.code)); binary_reader reader(mu->data.updates[i]); dsn_message_t resp = (r.req ? dsn_msg_create_response(r.req) : nullptr); uint64_t now = dsn_now_ns(); dispatch_rpc_call(r.code, reader, resp); now = dsn_now_ns() - now; _app_commit_latency.set(now); } else { // empty mutation write } if (_physical_error != 0) { derror("%s: physical error %d occurs in replication local app %s", _replica->name(), _physical_error, data_dir().c_str()); return ERR_LOCAL_APP_FAILURE; } } ++_last_committed_decree; _replica->update_commit_statistics(count); _app_commit_throughput.add((uint64_t)count); _app_commit_decree.increment(); return ERR_OK; }
void rpc_request_task::enqueue() { if (spec().rpc_request_dropped_before_execution_when_timeout) { _enqueue_ts_ns = dsn_now_ns(); } task::enqueue(node()->computation()->get_pool(spec().pool_code)); }
static void hpc_tail_logs_dumpper() { uint64_t nts = dsn_now_ns(); std::stringstream log; log << ::dsn::tools::spec().coredump_dir << "/hpc_tail_logs." << nts << ".log"; std::ofstream olog(log.str().c_str()); std::vector<int> threads; tail_log_manager::instance().get_all_keys(threads); for (auto& tid : threads) { __tail_log_info__* log; if (!tail_log_manager::instance().get(tid, log)) continue; tail_log_hdr *hdr = log->last_hdr, *tmp = log->last_hdr; while (tmp != nullptr && tmp != hdr) { if (!tmp->is_valid()) break; char* llog = (char*)(tmp)-tmp->length; olog << llog << std::endl; // try previous log tmp = tmp->prev; }; } olog.close(); }
static void profiler_on_task_end(task* this_) { uint64_t qts = task_ext_for_profiler::get(this_); uint64_t now = dsn_now_ns(); s_spec_profilers[this_->spec().code].ptr[TASK_EXEC_TIME_NS]->set(now - qts); s_spec_profilers[this_->spec().code].ptr[TASK_THROUGHPUT]->increment(); }
static void profiler_on_aio_enqueue(aio_task* this_) { uint64_t& ats = task_ext_for_profiler::get(this_); uint64_t now = dsn_now_ns(); s_spec_profilers[this_->spec().code].ptr[AIO_LATENCY_NS]->set(now - ats); ats = now; }
static void profiler_on_task_begin(task* this_) { uint64_t& qts = task_ext_for_profiler::get(this_); uint64_t now = dsn_now_ns(); s_spec_profilers[this_->spec().code].ptr[TASK_QUEUEING_TIME_NS]->set(now - qts); qts = now; }
static void print_header(FILE* fp, dsn_log_level_t log_level) { static char s_level_char[] = "IDWEF"; uint64_t ts = 0; if (::dsn::tools::is_engine_ready()) ts = dsn_now_ns(); char str[24]; ::dsn::utils::time_ms_to_string(ts/1000000, str); int tid = ::dsn::utils::get_current_tid(); fprintf(fp, "%c%s (%" PRIu64 " %04x) ", s_level_char[log_level], str, ts, tid); auto t = task::get_current_task_id(); if (t) { if (nullptr != task::get_current_worker2()) { fprintf(fp, "%6s.%7s%d.%016" PRIx64 ": ", task::get_current_node_name(), task::get_current_worker2()->pool_spec().name.c_str(), task::get_current_worker2()->index(), t ); } else { fprintf(fp, "%6s.%7s.%05d.%016" PRIx64 ": ", task::get_current_node_name(), "io-thrd", tid, t ); } } else { if (nullptr != task::get_current_worker2()) { fprintf(fp, "%6s.%7s%u: ", task::get_current_node_name(), task::get_current_worker2()->pool_spec().name.c_str(), task::get_current_worker2()->index() ); } else { fprintf(fp, "%6s.%7s.%05d: ", task::get_current_node_name(), "io-thrd", tid ); } } }
// return true means continue, otherwise early terminate with task::set_error_code static void profiler_on_aio_call(task* caller, aio_task* callee) { auto& prof = s_spec_profilers[caller->spec().code]; if (prof.collect_call_count) { prof.call_counts[callee->spec().code]++; } // time disk io starts task_ext_for_profiler::get(callee) = dsn_now_ns(); }
mutation::mutation() { next = nullptr; _private0 = 0; _not_logged = 1; _prepare_ts_ms = 0; _prepare_request = nullptr; strcpy(_name, "0.0.0.0"); _appro_data_bytes = sizeof(mutation_header); _create_ts_ns = dsn_now_ns(); _tid = ++s_tid; }
// return true means continue, otherwise early terminate with task::set_error_code static void profiler_on_rpc_reply(task* caller, message_ex* msg) { auto& prof = s_spec_profilers[caller->spec().code]; if (prof.collect_call_count) { prof.call_counts[msg->local_rpc_code]++; } uint64_t qts = message_ext_for_profiler::get(msg); uint64_t now = dsn_now_ns(); auto code = task_spec::get(msg->local_rpc_code)->rpc_paired_code; s_spec_profilers[code].ptr[RPC_SERVER_LATENCY_NS]->set(now - qts); }
// call normal task static void profiler_on_task_enqueue(task* caller, task* callee) { if (caller != nullptr) { auto& prof = s_spec_profilers[caller->spec().code]; auto code = caller->spec().code; if (prof.collect_call_count) { prof.call_counts[callee->spec().code]++; } } task_ext_for_profiler::get(callee) = dsn_now_ns(); }
static void profiler_on_rpc_response_enqueue(rpc_response_task* resp) { uint64_t& cts = task_ext_for_profiler::get(resp); uint64_t now = dsn_now_ns(); if (resp->get_response() != nullptr) { s_spec_profilers[resp->spec().code].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS]->set(now - cts); } else { s_spec_profilers[resp->spec().code].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT]->increment(); } cts = now; }
// return true means continue, otherwise early terminate with task::set_error_code static void profiler_on_rpc_call(task* caller, message_ex* req, rpc_response_task* callee) { auto& prof = s_spec_profilers[caller->spec().code]; if (prof.collect_call_count) { prof.call_counts[req->local_rpc_code]++; } // time rpc starts if (nullptr != callee) { task_ext_for_profiler::get(callee) = dsn_now_ns(); } }
std::string hpc_tail_logger::search( const char* keyword, int back_seconds, int back_start_seconds, std::unordered_set<int>& target_threads) { uint64_t nts = dsn_now_ns(); uint64_t start = nts - static_cast<uint64_t>(back_seconds)* 1000 * 1000 * 1000; // second to nanosecond uint64_t end = nts - static_cast<uint64_t>(back_start_seconds)* 1000 * 1000 * 1000; // second to nanosecond std::vector<int> threads; tail_log_manager::instance().get_all_keys(threads); std::stringstream ss; int log_count = 0; for (auto& tid : threads) { __tail_log_info__* log; if (!tail_log_manager::instance().get(tid, log)) continue; // filter by tid if (target_threads.size() > 0 && target_threads.find(tid) == target_threads.end()) continue; tail_log_hdr *hdr = log->last_hdr, *tmp = log->last_hdr; do { if (!tmp->is_valid()) break; // filter by time if (tmp->ts < start) break; if (tmp->ts > end) { tmp = tmp->prev; continue; } // filter by keyword char* llog = (char*)(tmp) - tmp->length; if (strstr(llog, keyword)) { ss << llog << std::endl; log_count++; } // try previous log tmp = tmp->prev; } while (tmp != nullptr && tmp != hdr); } char strb[24], stre[24]; ::dsn::utils::time_ms_to_string(start / 1000000, strb); ::dsn::utils::time_ms_to_string(end / 1000000, stre); ss << "------------------------------------------" << std::endl; ss << "In total (" << log_count << ") log entries are found between [" << strb << ", "<< stre << "] " << std::endl; return std::move(ss.str()); }
static void profiler_on_rpc_request_enqueue(rpc_request_task* callee) { uint64_t now = dsn_now_ns(); task_ext_for_profiler::get(callee) = now; message_ext_for_profiler::get(callee->get_request()) = now; }
void hpc_tail_logger::dsn_logv(const char *file, const char *function, const int line, dsn_log_level_t log_level, const char* title, const char *fmt, va_list args ) { // init log buffer if necessary if (s_tail_log_info.magic != 0xdeadbeef) { s_tail_log_info.buffer = (char*)malloc(_per_thread_buffer_bytes); s_tail_log_info.next_write_ptr = s_tail_log_info.buffer; s_tail_log_info.last_hdr = nullptr; memset(s_tail_log_info.buffer, '\0', _per_thread_buffer_bytes); tail_log_manager::instance().put(::dsn::utils::get_current_tid(), &s_tail_log_info); s_tail_log_info.magic = 0xdeadbeef; } // get enough write space >= 1K if (s_tail_log_info.next_write_ptr + 1024 > s_tail_log_info.buffer + _per_thread_buffer_bytes) { s_tail_log_info.next_write_ptr = s_tail_log_info.buffer; } char* ptr = s_tail_log_info.next_write_ptr; char* ptr0 = ptr; // remember it size_t capacity = static_cast<size_t>(s_tail_log_info.buffer + _per_thread_buffer_bytes - ptr); // print verbose log header uint64_t ts = 0; int tid = ::dsn::utils::get_current_tid(); if (::dsn::tools::is_engine_ready()) ts = dsn_now_ns(); char str[24]; ::dsn::utils::time_ms_to_string(ts / 1000000, str); auto wn = sprintf(ptr, "%s (%llu %04x) ", str, static_cast<long long unsigned int>(ts), tid); ptr += wn; capacity -= wn; task* t = task::get_current_task(); if (t) { if (nullptr != task::get_current_worker()) { wn = sprintf(ptr, "%6s.%7s%u.%016llx: ", task::get_current_node_name(), task::get_current_worker()->pool_spec().name.c_str(), task::get_current_worker()->index(), static_cast<long long unsigned int>(t->id()) ); } else { wn = sprintf(ptr, "%6s.%7s.%05d.%016llx: ", task::get_current_node_name(), "io-thrd", tid, static_cast<long long unsigned int>(t->id()) ); } } else { wn = sprintf(ptr, "%6s.%7s.%05d: ", task::get_current_node_name(), "io-thrd", tid ); } ptr += wn; capacity -= wn; // print body wn = std::vsnprintf(ptr, capacity, fmt, args); ptr += wn; capacity -= wn; // set binary entry header on tail tail_log_hdr* hdr = (tail_log_hdr*)ptr; hdr->log_break = 0; hdr->length = 0; hdr->magic = 0xdeadbeef; hdr->ts = ts; hdr->length = static_cast<int>(ptr - ptr0); hdr->prev = s_tail_log_info.last_hdr; s_tail_log_info.last_hdr = hdr; ptr += sizeof(tail_log_hdr); capacity -= sizeof(tail_log_hdr); // set next write ptr s_tail_log_info.next_write_ptr = ptr; // dump critical logs on screen if (log_level >= LOG_LEVEL_WARNING) { std::cout << ptr0 << std::endl; } }
void hpc_logger::dsn_logv(const char *file, const char *function, const int line, dsn_log_level_t log_level, const char* title, const char *fmt, va_list args ) { if (s_hpc_log_tls_info.magic != 0xdeadbeef) { s_hpc_log_tls_info.buffer = (char*)malloc(_per_thread_buffer_bytes); s_hpc_log_tls_info.next_write_ptr = s_hpc_log_tls_info.buffer; hpc_log_manager::instance().put(::dsn::utils::get_current_tid(), &s_hpc_log_tls_info); s_hpc_log_tls_info.magic = 0xdeadbeef; } // get enough write space >= 1K if (s_hpc_log_tls_info.next_write_ptr + 1024 > s_hpc_log_tls_info.buffer + _per_thread_buffer_bytes) { _write_list_lock.lock(); buffer_push(s_hpc_log_tls_info.buffer, static_cast<int>(s_hpc_log_tls_info.next_write_ptr - s_hpc_log_tls_info.buffer)); _write_list_lock.unlock(); _write_list_cond.notify_one(); s_hpc_log_tls_info.buffer = (char*)malloc(_per_thread_buffer_bytes); s_hpc_log_tls_info.next_write_ptr = s_hpc_log_tls_info.buffer; } char* ptr = s_hpc_log_tls_info.next_write_ptr; char* ptr0 = ptr; // remember it size_t capacity = static_cast<size_t>(s_hpc_log_tls_info.buffer + _per_thread_buffer_bytes - ptr); // print verbose log header uint64_t ts = 0; int tid = ::dsn::utils::get_current_tid(); if (::dsn::tools::is_engine_ready()) ts = dsn_now_ns(); char str[24]; ::dsn::utils::time_ms_to_string(ts / 1000000, str); auto wn = sprintf(ptr, "%s (%llu %04x) ", str, static_cast<long long unsigned int>(ts), tid); ptr += wn; capacity -= wn; task* t = task::get_current_task(); if (t) { if (nullptr != task::get_current_worker()) { wn = sprintf(ptr, "%6s.%7s%u.%016llx: ", task::get_current_node_name(), task::get_current_worker()->pool_spec().name.c_str(), task::get_current_worker()->index(), static_cast<long long unsigned int>(t->id()) ); } else { wn = sprintf(ptr, "%6s.%7s.%05d.%016llx: ", task::get_current_node_name(), "io-thrd", tid, static_cast<long long unsigned int>(t->id()) ); } } else { wn = sprintf(ptr, "%6s.%7s.%05d: ", task::get_current_node_name(), "io-thrd", tid ); } ptr += wn; capacity -= wn; // print body wn = std::vsnprintf(ptr, capacity - 1, fmt, args); *(ptr + wn) = '\n'; ptr += (wn + 1); capacity -= (wn + 1); // set next write ptr s_hpc_log_tls_info.next_write_ptr = ptr; // dump critical logs on screen if (log_level >= LOG_LEVEL_WARNING) { std::cout.write(ptr0, ptr - ptr0); } }
void replica::execute_mutation(mutation_ptr& mu) { dinfo("%s: execute mutation %s: request_count = %u", name(), mu->name(), static_cast<int>(mu->client_requests.size()) ); error_code err = ERR_OK; decree d = mu->data.header.decree; switch (status()) { case partition_status::PS_INACTIVE: if (_app->last_committed_decree() + 1 == d) { err = _app->write_internal(mu); } else { ddebug( "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64, name(), mu->name(), enum_to_string(status()), _app->last_committed_decree() ); } break; case partition_status::PS_PRIMARY: { check_state_completeness(); dassert(_app->last_committed_decree() + 1 == d, ""); err = _app->write_internal(mu); } break; case partition_status::PS_SECONDARY: if (!_secondary_states.checkpoint_is_running) { check_state_completeness(); dassert (_app->last_committed_decree() + 1 == d, ""); err = _app->write_internal(mu); } else { ddebug( "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64, name(), mu->name(), enum_to_string(status()), _app->last_committed_decree() ); // make sure private log saves the state // catch-up will be done later after checkpoint task is fininished dassert(_private_log != nullptr, ""); } break; case partition_status::PS_POTENTIAL_SECONDARY: if (_potential_secondary_states.learning_status == learner_status::LearningSucceeded || _potential_secondary_states.learning_status == learner_status::LearningWithPrepareTransient) { dassert(_app->last_committed_decree() + 1 == d, ""); err = _app->write_internal(mu); } else { // prepare also happens with learner_status::LearningWithPrepare, in this case // make sure private log saves the state, // catch-up will be done later after the checkpoint task is finished ddebug( "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64, name(), mu->name(), enum_to_string(status()), _app->last_committed_decree() ); } break; case partition_status::PS_ERROR: break; } ddebug("TwoPhaseCommit, %s: mutation %s committed, err = %s", name(), mu->name(), err.to_string()); _counter_commit_latency.set(dsn_now_ns() - mu->create_ts_ns()); if (err != ERR_OK) { handle_local_failure(err); } if (status() == partition_status::PS_PRIMARY) { mutation_ptr next = _primary_states.write_queue.check_possible_work( static_cast<int>(_prepare_list->max_decree() - d) ); if (next) { init_prepare(next); } } }