error_code replication_app_base::write_internal(mutation_ptr& mu)
{
    dassert (mu->data.header.decree == last_committed_decree() + 1, "");
    dassert(mu->client_requests.size() == mu->data.updates.size()
        && mu->client_requests.size() > 0, 
        "data inconsistency in mutation");

    int count = static_cast<int>(mu->client_requests.size());
    _batch_state = (count == 1 ? BS_NOT_BATCH : BS_BATCH);
    for (int i = 0; i < count; i++)
    {
        if (_batch_state == BS_BATCH && i + 1 == count)
        {
            _batch_state = BS_BATCH_LAST;
        }

        auto& r = mu->client_requests[i];
        if (r.code != RPC_REPLICATION_WRITE_EMPTY)
        {
            dinfo("%s: mutation %s dispatch rpc call: %s",
                  _replica->name(), mu->name(), dsn_task_code_to_string(r.code));
            binary_reader reader(mu->data.updates[i]);
            dsn_message_t resp = (r.req ? dsn_msg_create_response(r.req) : nullptr);

            uint64_t now = dsn_now_ns();
            dispatch_rpc_call(r.code, reader, resp);
            now = dsn_now_ns() - now;

            _app_commit_latency.set(now);
        }
        else
        {
            // empty mutation write
        }

        if (_physical_error != 0)
        {
            derror("%s: physical error %d occurs in replication local app %s",
                   _replica->name(), _physical_error, data_dir().c_str());
            return ERR_LOCAL_APP_FAILURE;
        }
    }

    ++_last_committed_decree;

    _replica->update_commit_statistics(count);
    _app_commit_throughput.add((uint64_t)count);
    _app_commit_decree.increment();

    return ERR_OK;
}
Exemple #2
0
void rpc_request_task::enqueue()
{
    if (spec().rpc_request_dropped_before_execution_when_timeout) {
        _enqueue_ts_ns = dsn_now_ns();
    }
    task::enqueue(node()->computation()->get_pool(spec().pool_code));
}
Exemple #3
0
        static void hpc_tail_logs_dumpper()
        {
            uint64_t nts = dsn_now_ns();
            std::stringstream log;
            log << ::dsn::tools::spec().coredump_dir << "/hpc_tail_logs." << nts << ".log";

            std::ofstream olog(log.str().c_str());

            std::vector<int> threads;
            tail_log_manager::instance().get_all_keys(threads);

            for (auto& tid : threads)
            {
                __tail_log_info__* log;
                if (!tail_log_manager::instance().get(tid, log))
                    continue;

                tail_log_hdr *hdr = log->last_hdr, *tmp = log->last_hdr;
                while (tmp != nullptr && tmp != hdr)
                {
                    if (!tmp->is_valid())
                        break;
                    
                    char* llog = (char*)(tmp)-tmp->length;
                    olog << llog << std::endl;

                    // try previous log
                    tmp = tmp->prev;

                };
            }

            olog.close();
        }
Exemple #4
0
 static void profiler_on_task_end(task* this_)
 {
     uint64_t qts = task_ext_for_profiler::get(this_);
     uint64_t now = dsn_now_ns();
     s_spec_profilers[this_->spec().code].ptr[TASK_EXEC_TIME_NS]->set(now - qts);
     s_spec_profilers[this_->spec().code].ptr[TASK_THROUGHPUT]->increment();
 }
Exemple #5
0
        static void profiler_on_aio_enqueue(aio_task* this_)
        {
            uint64_t& ats = task_ext_for_profiler::get(this_);
            uint64_t now = dsn_now_ns();

            s_spec_profilers[this_->spec().code].ptr[AIO_LATENCY_NS]->set(now - ats);
            ats = now;
        }
Exemple #6
0
        static void profiler_on_task_begin(task* this_)
        {
            uint64_t& qts = task_ext_for_profiler::get(this_);
            uint64_t now = dsn_now_ns();
            s_spec_profilers[this_->spec().code].ptr[TASK_QUEUEING_TIME_NS]->set(now - qts);
            qts = now;

        }
Exemple #7
0
        static void print_header(FILE* fp, dsn_log_level_t log_level)
        {
            static char s_level_char[] = "IDWEF";

            uint64_t ts = 0;
            if (::dsn::tools::is_engine_ready())
                ts = dsn_now_ns();

            char str[24];
            ::dsn::utils::time_ms_to_string(ts/1000000, str);

            int tid = ::dsn::utils::get_current_tid(); 

            fprintf(fp, "%c%s (%" PRIu64 " %04x) ", s_level_char[log_level],
                    str, ts, tid);

            auto t = task::get_current_task_id();
            if (t)
            {
                if (nullptr != task::get_current_worker2())
                {
                    fprintf(fp, "%6s.%7s%d.%016" PRIx64 ": ",
                        task::get_current_node_name(),
                        task::get_current_worker2()->pool_spec().name.c_str(),
                        task::get_current_worker2()->index(),
                        t
                        );
                }
                else
                {
                    fprintf(fp, "%6s.%7s.%05d.%016" PRIx64 ": ",
                        task::get_current_node_name(),
                        "io-thrd",
                        tid,
                        t
                        );
                }
            }
            else
            {
                if (nullptr != task::get_current_worker2())
                {
                    fprintf(fp, "%6s.%7s%u: ",
                        task::get_current_node_name(),
                        task::get_current_worker2()->pool_spec().name.c_str(),
                        task::get_current_worker2()->index()
                        );
                }
                else
                {
                    fprintf(fp, "%6s.%7s.%05d: ",
                        task::get_current_node_name(),
                        "io-thrd",
                        tid
                        );
                }
            }
        }
Exemple #8
0
        // return true means continue, otherwise early terminate with task::set_error_code
        static void profiler_on_aio_call(task* caller, aio_task* callee)
        {
            auto& prof = s_spec_profilers[caller->spec().code];
            if (prof.collect_call_count)
            {
                prof.call_counts[callee->spec().code]++;
            }

            // time disk io starts
            task_ext_for_profiler::get(callee) = dsn_now_ns();
        }
Exemple #9
0
mutation::mutation()
{
    next = nullptr;
    _private0 = 0; 
    _not_logged = 1;
    _prepare_ts_ms = 0;
    _prepare_request = nullptr;
    strcpy(_name, "0.0.0.0");
    _appro_data_bytes = sizeof(mutation_header);
    _create_ts_ns = dsn_now_ns();
    _tid = ++s_tid;
}
Exemple #10
0
        // return true means continue, otherwise early terminate with task::set_error_code
        static void profiler_on_rpc_reply(task* caller, message_ex* msg)
        {
            auto& prof = s_spec_profilers[caller->spec().code];
            if (prof.collect_call_count)
            {
                prof.call_counts[msg->local_rpc_code]++;
            }

            uint64_t qts = message_ext_for_profiler::get(msg);
            uint64_t now = dsn_now_ns();
            auto code = task_spec::get(msg->local_rpc_code)->rpc_paired_code;
            s_spec_profilers[code].ptr[RPC_SERVER_LATENCY_NS]->set(now - qts);
        }
Exemple #11
0
        // call normal task
        static void profiler_on_task_enqueue(task* caller, task* callee)
        {
            if (caller != nullptr)
            {
                auto& prof = s_spec_profilers[caller->spec().code];
                auto code = caller->spec().code;
                if (prof.collect_call_count)
                {
                    prof.call_counts[callee->spec().code]++;
                }
            }

            task_ext_for_profiler::get(callee) = dsn_now_ns();
        }
Exemple #12
0
 static void profiler_on_rpc_response_enqueue(rpc_response_task* resp)
 {
     uint64_t& cts = task_ext_for_profiler::get(resp);
     uint64_t now = dsn_now_ns();
     if (resp->get_response() != nullptr)
     {
         s_spec_profilers[resp->spec().code].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS]->set(now - cts);
     }
     else
     {
         s_spec_profilers[resp->spec().code].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT]->increment();
     }
     cts = now;
 }
Exemple #13
0
        // return true means continue, otherwise early terminate with task::set_error_code
        static void profiler_on_rpc_call(task* caller, message_ex* req, rpc_response_task* callee)
        {
            auto& prof = s_spec_profilers[caller->spec().code];
            if (prof.collect_call_count)
            {
                prof.call_counts[req->local_rpc_code]++;
            }

            // time rpc starts
            if (nullptr != callee)
            {
                task_ext_for_profiler::get(callee) = dsn_now_ns();
            }

        }
Exemple #14
0
        std::string hpc_tail_logger::search(
            const char* keyword, int back_seconds, int back_start_seconds, 
            std::unordered_set<int>& target_threads)
        {
            uint64_t nts = dsn_now_ns();
            uint64_t start = nts - static_cast<uint64_t>(back_seconds)* 1000 * 1000 * 1000; // second to nanosecond
            uint64_t end = nts - static_cast<uint64_t>(back_start_seconds)* 1000 * 1000 * 1000; // second to nanosecond

            std::vector<int> threads;
            tail_log_manager::instance().get_all_keys(threads);

            std::stringstream ss;
            int log_count = 0;

            for (auto& tid : threads)
            {
                __tail_log_info__* log;
                if (!tail_log_manager::instance().get(tid, log))
                    continue;

                // filter by tid
                if (target_threads.size() > 0 && target_threads.find(tid) == target_threads.end())
                    continue;

                tail_log_hdr *hdr = log->last_hdr, *tmp = log->last_hdr;
                do 
                {
                    if (!tmp->is_valid())
                        break;

                    // filter by time
                    if (tmp->ts < start)
                        break;
                    
                    if (tmp->ts > end)
                    {
                        tmp = tmp->prev;
                        continue;
                    }
                        
                    // filter by keyword
                    char* llog = (char*)(tmp) - tmp->length;
                    if (strstr(llog, keyword))
                    {
                        ss << llog << std::endl;
                        log_count++;
                    }

                    // try previous log
                    tmp = tmp->prev;
                    
                } while (tmp != nullptr && tmp != hdr);
            }

            char strb[24], stre[24];
            ::dsn::utils::time_ms_to_string(start / 1000000, strb);
            ::dsn::utils::time_ms_to_string(end / 1000000, stre);

            ss << "------------------------------------------" << std::endl;
            ss << "In total (" << log_count << ") log entries are found between [" << strb << ", "<< stre << "] " << std::endl;

            return std::move(ss.str());
        }
Exemple #15
0
 static void profiler_on_rpc_request_enqueue(rpc_request_task* callee)
 {
     uint64_t now = dsn_now_ns();
     task_ext_for_profiler::get(callee) = now;
     message_ext_for_profiler::get(callee->get_request()) = now;
 }
Exemple #16
0
        void hpc_tail_logger::dsn_logv(const char *file,
            const char *function,
            const int line,
            dsn_log_level_t log_level,
            const char* title,
            const char *fmt,
            va_list args
            )
        {
            // init log buffer if necessary
            if (s_tail_log_info.magic != 0xdeadbeef)
            {
                s_tail_log_info.buffer = (char*)malloc(_per_thread_buffer_bytes);
                s_tail_log_info.next_write_ptr = s_tail_log_info.buffer;
                s_tail_log_info.last_hdr = nullptr;
                memset(s_tail_log_info.buffer, '\0', _per_thread_buffer_bytes);

                tail_log_manager::instance().put(::dsn::utils::get_current_tid(), &s_tail_log_info);
                s_tail_log_info.magic = 0xdeadbeef;
            }

            // get enough write space >= 1K
            if (s_tail_log_info.next_write_ptr + 1024 > s_tail_log_info.buffer + _per_thread_buffer_bytes)
            {
                s_tail_log_info.next_write_ptr = s_tail_log_info.buffer;
            }
            char* ptr = s_tail_log_info.next_write_ptr;
            char* ptr0 = ptr; // remember it
            size_t capacity = static_cast<size_t>(s_tail_log_info.buffer + _per_thread_buffer_bytes - ptr);

            // print verbose log header    
            uint64_t ts = 0;
            int tid = ::dsn::utils::get_current_tid();
            if (::dsn::tools::is_engine_ready())
                ts = dsn_now_ns();
            char str[24];
            ::dsn::utils::time_ms_to_string(ts / 1000000, str);            
            auto wn = sprintf(ptr, "%s (%llu %04x) ", str, static_cast<long long unsigned int>(ts), tid);
            ptr += wn;
            capacity -= wn;

            task* t = task::get_current_task();
            if (t)
            {
                if (nullptr != task::get_current_worker())
                {
                    wn = sprintf(ptr, "%6s.%7s%u.%016llx: ",
                        task::get_current_node_name(),
                        task::get_current_worker()->pool_spec().name.c_str(),
                        task::get_current_worker()->index(),
                        static_cast<long long unsigned int>(t->id())
                        );
                }
                else
                {
                    wn = sprintf(ptr, "%6s.%7s.%05d.%016llx: ",
                        task::get_current_node_name(),
                        "io-thrd",
                        tid,
                        static_cast<long long unsigned int>(t->id())
                        );
                }
            }
            else
            {
                wn = sprintf(ptr, "%6s.%7s.%05d: ",
                    task::get_current_node_name(),
                    "io-thrd",
                    tid
                    );
            }

            ptr += wn;
            capacity -= wn;

            // print body
            wn = std::vsnprintf(ptr, capacity, fmt, args);
            ptr += wn;
            capacity -= wn;
            
            // set binary entry header on tail
            tail_log_hdr* hdr = (tail_log_hdr*)ptr;
            hdr->log_break = 0;
            hdr->length = 0;
            hdr->magic = 0xdeadbeef;
            hdr->ts = ts;
            hdr->length = static_cast<int>(ptr - ptr0);
            hdr->prev = s_tail_log_info.last_hdr;
            s_tail_log_info.last_hdr = hdr;

            ptr += sizeof(tail_log_hdr);
            capacity -= sizeof(tail_log_hdr);

            // set next write ptr
            s_tail_log_info.next_write_ptr = ptr;

            // dump critical logs on screen
            if (log_level >= LOG_LEVEL_WARNING)
            {
                std::cout << ptr0 << std::endl;
            }
        }
Exemple #17
0
		void hpc_logger::dsn_logv(const char *file,
			const char *function,
			const int line,
			dsn_log_level_t log_level,
			const char* title,
			const char *fmt,
			va_list args
			)
		{
			if (s_hpc_log_tls_info.magic != 0xdeadbeef)
			{
				s_hpc_log_tls_info.buffer = (char*)malloc(_per_thread_buffer_bytes);
				s_hpc_log_tls_info.next_write_ptr = s_hpc_log_tls_info.buffer;

				hpc_log_manager::instance().put(::dsn::utils::get_current_tid(), &s_hpc_log_tls_info);
				s_hpc_log_tls_info.magic = 0xdeadbeef;
			}

			// get enough write space >= 1K
			if (s_hpc_log_tls_info.next_write_ptr + 1024 > s_hpc_log_tls_info.buffer + _per_thread_buffer_bytes)
			{
				_write_list_lock.lock();
				buffer_push(s_hpc_log_tls_info.buffer, static_cast<int>(s_hpc_log_tls_info.next_write_ptr - s_hpc_log_tls_info.buffer));
				_write_list_lock.unlock();

                _write_list_cond.notify_one();

				s_hpc_log_tls_info.buffer = (char*)malloc(_per_thread_buffer_bytes);
				s_hpc_log_tls_info.next_write_ptr = s_hpc_log_tls_info.buffer;
			}

			char* ptr = s_hpc_log_tls_info.next_write_ptr;
			char* ptr0 = ptr; // remember it
			size_t capacity = static_cast<size_t>(s_hpc_log_tls_info.buffer + _per_thread_buffer_bytes - ptr);

			// print verbose log header    
			uint64_t ts = 0;
			int tid = ::dsn::utils::get_current_tid();
			if (::dsn::tools::is_engine_ready())
				ts = dsn_now_ns();
			char str[24];
			::dsn::utils::time_ms_to_string(ts / 1000000, str);
			auto wn = sprintf(ptr, "%s (%llu %04x) ", str, static_cast<long long unsigned int>(ts), tid);
			ptr += wn;
			capacity -= wn;

			task* t = task::get_current_task();
			if (t)
			{
				if (nullptr != task::get_current_worker())
				{
					wn = sprintf(ptr, "%6s.%7s%u.%016llx: ",
						task::get_current_node_name(),
						task::get_current_worker()->pool_spec().name.c_str(),
						task::get_current_worker()->index(),
						static_cast<long long unsigned int>(t->id())
						);
				}
				else
				{
					wn = sprintf(ptr, "%6s.%7s.%05d.%016llx: ",
						task::get_current_node_name(),
						"io-thrd",
						tid,
						static_cast<long long unsigned int>(t->id())
						);
				}
			}
			else
			{
				wn = sprintf(ptr, "%6s.%7s.%05d: ",
					task::get_current_node_name(),
					"io-thrd",
					tid
					);
			}

			ptr += wn;
			capacity -= wn;

			// print body
			wn = std::vsnprintf(ptr, capacity - 1, fmt, args);
            *(ptr + wn) = '\n';
			ptr += (wn + 1);
			capacity -= (wn + 1);

			// set next write ptr
			s_hpc_log_tls_info.next_write_ptr = ptr;

			// dump critical logs on screen
			if (log_level >= LOG_LEVEL_WARNING)
			{
                std::cout.write(ptr0, ptr - ptr0);
			}	
		}
Exemple #18
0
void replica::execute_mutation(mutation_ptr& mu)
{
    dinfo("%s: execute mutation %s: request_count = %u",
        name(), 
        mu->name(), 
        static_cast<int>(mu->client_requests.size())
        );

    error_code err = ERR_OK;
    decree d = mu->data.header.decree;

    switch (status())
    {
    case partition_status::PS_INACTIVE:
        if (_app->last_committed_decree() + 1 == d)
        {
            err = _app->write_internal(mu);
        }
        else
        {
            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );
        }
        break;
    case partition_status::PS_PRIMARY:
        {
            check_state_completeness();
            dassert(_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        break;

    case partition_status::PS_SECONDARY:
        if (!_secondary_states.checkpoint_is_running)
        {
            check_state_completeness();
            dassert (_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        else
        {
            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );

            // make sure private log saves the state
            // catch-up will be done later after checkpoint task is fininished
            dassert(_private_log != nullptr, "");          
        }
        break;
    case partition_status::PS_POTENTIAL_SECONDARY:
        if (_potential_secondary_states.learning_status == learner_status::LearningSucceeded ||
            _potential_secondary_states.learning_status == learner_status::LearningWithPrepareTransient)
        {
            dassert(_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        else
        {
            // prepare also happens with learner_status::LearningWithPrepare, in this case
            // make sure private log saves the state,
            // catch-up will be done later after the checkpoint task is finished

            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );
        }
        break;
    case partition_status::PS_ERROR:
        break;
    }
    
    ddebug("TwoPhaseCommit, %s: mutation %s committed, err = %s", name(), mu->name(), err.to_string());

    _counter_commit_latency.set(dsn_now_ns() - mu->create_ts_ns());

    if (err != ERR_OK)
    {
        handle_local_failure(err);
    }

    if (status() == partition_status::PS_PRIMARY)
    {
        mutation_ptr next = _primary_states.write_queue.check_possible_work(
            static_cast<int>(_prepare_list->max_decree() - d)
            );

        if (next)
        {
            init_prepare(next);
        }
    }
}