inline bool rpc_session::unlink_message_for_send() { auto n = _messages.next(); int bcount = 0; dbg_dassert(0 == _sending_buffers.size(), ""); dbg_dassert(0 == _sending_msgs.size(), ""); while (n != &_messages) { auto lmsg = CONTAINING_RECORD(n, message_ex, dl); auto lcount = _parser->get_send_buffers_count(lmsg); if (bcount > 0 && bcount + lcount > _max_buffer_block_count_per_send) { break; } _sending_buffers.resize(bcount + lcount); _parser->prepare_buffers_on_send(lmsg, 0, &_sending_buffers[bcount]); bcount += lcount; _sending_msgs.push_back(lmsg); n = n->next(); lmsg->dl.remove(); } // added in send_message _message_count.fetch_sub((int)_sending_msgs.size(), std::memory_order_relaxed); return _sending_msgs.size() > 0; }
inline bool rpc_session::unlink_message_for_send() { auto n = _messages.next(); int bcount = 0; dbg_dassert(0 == _sending_buffers.size(), ""); dbg_dassert(0 == _sending_msgs.size(), ""); while (n != &_messages) { auto lmsg = CONTAINING_RECORD(n, message_ex, dl); auto lcount = _parser->get_buffer_count_on_send(lmsg); if (bcount > 0 && bcount + lcount > _max_buffer_block_count_per_send) { break; } _sending_buffers.resize(bcount + lcount); auto rcount = _parser->get_buffers_on_send(lmsg, &_sending_buffers[bcount]); dassert(lcount >= rcount, ""); if (lcount != rcount) _sending_buffers.resize(bcount + rcount); bcount += rcount; _sending_msgs.push_back(lmsg); n = n->next(); lmsg->dl.remove(); } // added in send_message _message_count -= (int)_sending_msgs.size(); return _sending_msgs.size() > 0; }
void replication_app_client_base::call_with_address(dsn::rpc_address addr, request_context_ptr request) { auto& msg = request->request; dbg_dassert(!addr.is_invalid(), ""); dbg_dassert(_app_id > 0, ""); if (request->header_pos != 0) { if (request->is_read) { request->read_header.gpid.app_id = _app_id; request->read_header.gpid.pidx = request->partition_index; blob buffer(request->header_pos, 0, sizeof(request->read_header)); binary_writer writer(buffer); marshall(writer, request->read_header); dsn_msg_options_t opts; opts.timeout_ms = request->timeout_ms; opts.thread_hash = gpid_to_hash(request->read_header.gpid); opts.vnid = *(uint64_t*)(&request->read_header.gpid); dsn_msg_set_options(request->request, &opts, DSN_MSGM_HASH | DSN_MSGM_TIMEOUT); // TODO: not supported yet DSN_MSGM_VNID); } else { request->write_header.gpid.app_id = _app_id; request->write_header.gpid.pidx = request->partition_index; blob buffer(request->header_pos, 0, sizeof(request->write_header)); binary_writer writer(buffer); marshall(writer, request->write_header); dsn_msg_options_t opts; opts.timeout_ms = request->timeout_ms; opts.thread_hash = gpid_to_hash(request->write_header.gpid); opts.vnid = *(uint64_t*)(&request->write_header.gpid); dsn_msg_set_options(request->request, &opts, DSN_MSGM_HASH | DSN_MSGM_TIMEOUT); // TODO: not supported yet DSN_MSGM_VNID | DSN_MSGM_CONTEXT); } request->header_pos = 0; } { zauto_lock l(request->lock); rpc::call( addr, msg, this, std::bind( &replication_app_client_base::replica_rw_reply, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, request ) ); } }
rpc_request_task::rpc_request_task(message_ptr& request, service_node* node) : task(task_code(request->header().local_rpc_code), request->header().client.hash, node), _request(request) { dbg_dassert (TASK_TYPE_RPC_REQUEST == spec().type, "task type must be RPC_REQUEST, please use DEFINE_TASK_CODE_RPC to define the task code"); }
replication_app_client_base::request_context* replication_app_client_base::create_write_context( int partition_index, task_code code, rpc_response_task_ptr callback, int reply_hash ) { auto rc = new request_context; rc->callback_task = callback; rc->is_read = false; rc->partition_index = partition_index; rc->write_header.gpid.app_id = _app_id; rc->write_header.gpid.pidx = partition_index; rc->write_header.code = code; rc->timeout_timer = nullptr; if (rc->write_header.gpid.app_id == -1) { rc->header_pos = callback->get_request()->writer().write_placeholder(); dbg_dassert(rc->header_pos != 0xffff, ""); } else { rc->header_pos = 0xffff; marshall(callback->get_request()->writer(), rc->write_header); callback->get_request()->header().client.hash = gpid_to_hash(rc->write_header.gpid); } return rc; }
rpc_response_task::rpc_response_task( message_ex* request, dsn_rpc_response_handler_t cb, void* context, dsn_task_cancelled_handler_t on_cancel, int hash, service_node* node ) : task(task_spec::get(request->local_rpc_code)->rpc_paired_code, context, on_cancel, hash == 0 ? request->header->client.hash : hash, node) { _cb = cb; _is_null = (_cb == nullptr); set_error_code(ERR_IO_PENDING); dbg_dassert (TASK_TYPE_RPC_RESPONSE == spec().type, "task must be of RPC_RESPONSE type, please use DEFINE_TASK_CODE_RPC to define the request task code"); _request = request; _response = nullptr; _caller_pool = task::get_current_worker() ? task::get_current_worker()->pool() : nullptr; _request->add_ref(); // released in dctor }
replication_app_client_base::request_context* replication_app_client_base::create_read_context( int partition_index, task_code code, rpc_response_task_ptr callback, read_semantic_t read_semantic, decree snapshot_decree, // only used when ReadSnapshot int reply_hash ) { auto rc = new request_context; rc->callback_task = callback; rc->is_read = true; rc->partition_index = partition_index; rc->read_header.gpid.app_id = _app_id; rc->read_header.gpid.pidx = partition_index; rc->read_header.code = code; rc->read_header.semantic = read_semantic; rc->read_header.version_decree = snapshot_decree; rc->timeout_timer = nullptr; if (rc->read_header.gpid.app_id == -1) { rc->header_pos = callback->get_request()->writer().write_placeholder(); dbg_dassert(rc->header_pos != 0xffff, ""); } else { rc->header_pos = 0xffff; marshall(callback->get_request()->writer(), rc->read_header); callback->get_request()->header().client.hash = gpid_to_hash(rc->read_header.gpid); } return rc; }
rpc_response_task::rpc_response_task(message_ex *request, rpc_response_handler &&cb, int hash, service_node *node) : task(task_spec::get(request->local_rpc_code)->rpc_paired_code, hash == 0 ? request->header->client.thread_hash : hash, node), _cb(std::move(cb)) { _is_null = (_cb == nullptr); set_error_code(ERR_IO_PENDING); dbg_dassert(TASK_TYPE_RPC_RESPONSE == spec().type, "%s is not of RPC_RESPONSE type, please use DEFINE_TASK_CODE_RPC to define the " "request task code", spec().name.c_str()); _request = request; _response = nullptr; _caller_pool = get_current_worker() ? get_current_worker()->pool() : nullptr; _request->add_ref(); // released in dctor }
void replica::send_prepare_message(const end_point& addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds) { message_ptr msg = message::create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid())); marshall(msg, get_gpid()); replica_configuration rconfig; _primary_states.get_replica_config(status, rconfig); marshall(msg, rconfig); mu->write_to(msg); dbg_dassert (mu->remote_tasks().find(addr) == mu->remote_tasks().end()); mu->remote_tasks()[addr] = rpc::call(addr, msg, this, std::bind(&replica::on_prepare_reply, this, std::make_pair(mu, rconfig.status), std::placeholders::_1, std::placeholders::_2, std::placeholders::_3), gpid_to_hash(get_gpid()) ); ddebug( "%s: mutation %s send_prepare_message to %s:%d as %s", name(), mu->name(), addr.name.c_str(), static_cast<int>(addr.port), enum_to_string(rconfig.status) ); }
void task_queue::enqueue_internal(task* task) { auto& sp = task->spec(); auto throttle_mode = sp.rpc_request_throttling_mode; if (throttle_mode != TM_NONE) { int ac_value = 0; if (_spec->enable_virtual_queue_throttling) { ac_value = _virtual_queue_length; } else { ac_value = count(); } if (throttle_mode == TM_DELAY) { int delay_ms = sp.rpc_request_delayer.delay(ac_value, _spec->queue_length_throttling_threshold); if (delay_ms > 0) { auto rtask = static_cast<rpc_request_task*>(task); rtask->get_request()->io_session->delay_recv(delay_ms); dwarn("too many pending tasks (%d), delay traffic from %s for %d milliseconds", ac_value, rtask->get_request()->header->from_address.to_string(), delay_ms ); } } else { dbg_dassert(TM_REJECT == throttle_mode, "unknow mode %d", (int)throttle_mode); if (ac_value > _spec->queue_length_throttling_threshold) { auto rtask = static_cast<rpc_request_task*>(task); auto resp = rtask->get_request()->create_response(); task::get_current_rpc()->reply(resp, ERR_BUSY); dwarn("too many pending tasks (%d), reject message from %s with trace_id = %016" PRIx64, ac_value, rtask->get_request()->header->from_address.to_string(), rtask->get_request()->header->trace_id ); task->release_ref(); // added in task::enqueue(pool) return; } } } tls_dsn.last_worker_queue_size = increase_count(); enqueue(task); }
rpc_request_task::rpc_request_task(message_ex* request, rpc_handler_ptr& h, service_node* node) : task(dsn_task_code_t(request->local_rpc_code), request->header->client.hash, node), _request(request), _handler(h) { dbg_dassert (TASK_TYPE_RPC_REQUEST == spec().type, "task type must be RPC_REQUEST, please use DEFINE_TASK_CODE_RPC to define the task code"); _request->add_ref(); // released in dctor }
message_ex* message_ex::create_receive_message(blob& data) { message_ex* msg = new message_ex(); msg->header = (message_header*)data.data(); msg->_is_read = true; data = data.range((int)sizeof(message_header)); msg->buffers.push_back(data); dbg_dassert(msg->header->body_length > 0, "message %s is empty!", msg->header->rpc_name); return msg; }
void tls_trans_mem_commit(size_t use_size) { dbg_dassert(tls_trans_memory.magic == 0xdeadbeef && !tls_trans_memory.committed && use_size <= tls_trans_memory.remain_bytes, "invalid use or parameter of tls_trans_mem_commit"); tls_trans_memory.next += use_size; tls_trans_memory.remain_bytes -= use_size; tls_trans_memory.committed = true; }
rpc_request_task::rpc_request_task(message_ex *request, rpc_request_handler &&h, service_node *node) : task(request->rpc_code(), request->header->client.thread_hash, node), _request(request), _handler(std::move(h)), _enqueue_ts_ns(0) { dbg_dassert( TASK_TYPE_RPC_REQUEST == spec().type, "%s is not a RPC_REQUEST task, please use DEFINE_TASK_CODE_RPC to define the task code", spec().name.c_str()); _request->add_ref(); // released in dctor }
void timer_task::exec() { _cb(_context); if (_interval_milliseconds > 0) { //_state must be TASK_STATE_RUNNING here dbg_dassert(_state.load(std::memory_order_relaxed) == TASK_STATE_RUNNING, "corrupted timer task state"); _state.store(TASK_STATE_READY, std::memory_order_release); set_delay(_interval_milliseconds); } }
rpc_response_task::rpc_response_task(message_ptr& request, int hash) : task(task_spec::get(request->header().local_rpc_code)->rpc_paired_code, hash == 0 ? request->header().client.hash : hash) { set_error_code(ERR_IO_PENDING); dbg_dassert (TASK_TYPE_RPC_RESPONSE == spec().type, "task must be of RPC_RESPONSE type, please use DEFINE_TASK_CODE_RPC to define the request task code"); _request = request; _caller_pool = task::get_current_worker() ? task::get_current_worker()->pool() : nullptr; }
rpc_request_task::rpc_request_task(message_ex* request, rpc_handler_info* h, service_node* node) : task(dsn_task_code_t(request->local_rpc_code), nullptr, [](void*) { dassert(false, "rpc request task cannot be cancelled"); }, static_cast<int>(request->header->client.hash), node), _request(request), _handler(h), _enqueue_ts_ns(0) { dbg_dassert (TASK_TYPE_RPC_REQUEST == spec().type, "%s is not a RPC_REQUEST task, please use DEFINE_TASK_CODE_RPC to define the task code", spec().name.c_str() ); _request->add_ref(); // released in dctor }
bool rpc_session::on_recv_message(message_ex* msg, int delay_ms) { if (msg->header->from_address.is_invalid()) msg->header->from_address = _remote_addr; msg->to_address = _net.address(); msg->io_session = this; if (msg->header->context.u.is_request) { // ATTENTION: need to check if self connection occurred. // // When we try to connect some socket in the same host, if we don't bind the client to a specific port, // operating system will provide ephemeral port for us. If it's happened to be the one we want to connect to, // it causes self connection. // // The case is: // - this session is a client session // - the remote address is in the same host // - the remote address is not listened, which means the remote port is not occupied // - operating system chooses the remote port as client's ephemeral port if (is_client() && msg->header->from_address == _net.engine()->primary_address()) { derror("self connection detected, address = %s", msg->header->from_address.to_string()); dassert(msg->get_count() == 0, "message should not be referenced by anybody so far"); delete msg; return false; } dbg_dassert(!is_client(), "only rpc server session can recv rpc requests"); _net.on_recv_request(msg, delay_ms); } // both rpc server session and rpc client session can receive rpc reply // specially, rpc client session can receive general rpc reply, // and rpc server session can receive forwarded rpc reply else { _matcher->on_recv_reply(&_net, msg->header->id, msg, delay_ms); } return true; }
void rpc_session::on_recv_message(message_ex* msg, int delay_ms) { msg->to_address = _net.address(); msg->io_session = this; if (msg->header->context.u.is_request) { dbg_dassert(!is_client(), "only rpc server session can recv rpc requests"); _net.on_recv_request(msg, delay_ms); } // both rpc server session and rpc client session can receive rpc reply // specially, rpc client session can receive general rpc reply, // and rpc server session can receive forwarded rpc reply else { _matcher->on_recv_reply(&_net, msg->header->id, msg, delay_ms); } }
void hpc_rpc_session::do_write(message_ex* msg) { static_assert (sizeof(dsn_message_parser::send_buf) == sizeof(struct iovec), "make sure they are compatible"); dbg_dassert(msg != nullptr, "cannot send empty msg"); // new msg if (_sending_msg == nullptr) { _sending_msg = msg; _sending_buffer_start_index = 0; } // continue old msg else { dassert(_sending_msg == msg, "only one sending msg is possible"); } // prepare send buffer, make sure header is already in the buffer while (true) { int buffer_count = (int)_sending_buffers.size() - _sending_buffer_start_index; struct msghdr hdr; memset((void*)&hdr, 0, sizeof(hdr)); hdr.msg_name = (void*)&_peer_addr; hdr.msg_namelen = (socklen_t)sizeof(_peer_addr); hdr.msg_iov = (struct iovec*)&_sending_buffers[_sending_buffer_start_index]; hdr.msg_iovlen = (size_t)buffer_count; int sz = sendmsg(_socket, &hdr, MSG_NOSIGNAL); int err = errno; dinfo("(s = %d) call sendmsg on %s:%hu, return %d, err = %s", _socket, _remote_addr.name(), _remote_addr.port(), sz, strerror(err) ); if (sz < 0) { if (err != EAGAIN && err != EWOULDBLOCK) { derror("(s = %d) sendmsg failed, err = %s", _socket, strerror(err)); on_failure(); } else { // wait for epoll_wait notification } return; } else { int len = (int)sz; int buf_i = _sending_buffer_start_index; while (len > 0) { auto& buf = _sending_buffers[buf_i]; if (len >= (int)buf.sz) { buf_i++; len -= (int)buf.sz; } else { buf.buf = (char*)buf.buf + len; buf.sz -= len; break; } } _sending_buffer_start_index = buf_i; // message completed, continue next message if (_sending_buffer_start_index == (int)_sending_buffers.size()) { dassert(len == 0, "buffer must be sent completely"); _sending_msg = nullptr; _send_lock.unlock(); // avoid recursion // try next msg recursively on_send_completed(msg); _send_lock.lock(); return; } else { // try next while(true) loop to continue sending current msg } } } }
void replication_app_client_base::call(request_context* request, bool no_delay) { auto& msg = request->callback_task->get_request(); auto nts = ::dsn::service::env::now_us(); if (nts + 100 >= msg->header().client.timeout_ts_us) // < 100us { message_ptr nil(nullptr); end_request(request, ERR_TIMEOUT, nil); delete request; return; } end_point addr; int app_id; error_code err = get_address( request->partition_index, !request->is_read, addr, app_id, request->read_header.semantic ); // target node in cache if (err == ERR_SUCCESS) { dbg_dassert(addr != end_point::INVALID, ""); dassert(app_id > 0, ""); if (request->header_pos != 0xffff) { if (request->is_read) { request->read_header.gpid.app_id = app_id; marshall(msg->writer(), request->read_header, request->header_pos); msg->header().client.hash = gpid_to_hash(request->read_header.gpid); } else { request->write_header.gpid.app_id = app_id; marshall(msg->writer(), request->write_header, request->header_pos); msg->header().client.hash = gpid_to_hash(request->write_header.gpid); } request->header_pos = 0xffff; } rpc::call( addr, msg, this, std::bind( &replication_app_client_base::replica_rw_reply, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, request ) ); } // target node not known else if (!no_delay) { // delay 1 second for further config query tasking::enqueue(LPC_REPLICATION_DELAY_QUERY_CONFIG, this, std::bind(&replication_app_client_base::call, this, request, true), 0, 1000 ); } else { zauto_lock l(_requests_lock); // init timeout timer if necessary if (request->timeout_timer == nullptr) { request->timeout_timer = tasking::enqueue( LPC_REPLICATION_CLIENT_REQUEST_TIMEOUT, this, std::bind(&replication_app_client_base::on_user_request_timeout, this, request), 0, static_cast<int>((msg->header().client.timeout_ts_us - nts) / 1000) ); } // put into pending queue of querying target partition auto it = _pending_requests.find(request->partition_index); if (it == _pending_requests.end()) { auto pc = new partition_context; pc->query_config_task = nullptr; it = _pending_requests.insert(pending_requests::value_type(request->partition_index, pc)).first; } it->second->requests.push_back(request); // init configuration query task if necessary if (it->second->query_config_task == nullptr) { message_ptr msg = message::create_request(RPC_CM_CALL); meta_request_header hdr; hdr.rpc_tag = RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX; marshall(msg->writer(), hdr); configuration_query_by_index_request req; req.app_name = _app_name; req.partition_indices.push_back(request->partition_index); marshall(msg->writer(), req); it->second->query_config_task = rpc::call_replicated( _last_contact_point, _meta_servers, msg, this, std::bind(&replication_app_client_base::query_partition_configuration_reply, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, request->partition_index ) ); } } }
void message_ex::seal(bool fill_crc) { dassert (!_is_read && _rw_committed, "seal can only be applied to write mode messages"); dbg_dassert(header->body_length > 0, "message %s is empty!", header->rpc_name); if (fill_crc) { // compute data crc if necessary if (header->body_crc32 == CRC_INVALID) { int i_max = (int)buffers.size() - 1; uint32_t crc32 = 0; size_t len = 0; for (int i = 0; i <= i_max; i++) { uint32_t lcrc; const void* ptr; size_t sz; if (i == 0) { ptr = (const void*)(buffers[i].data() + sizeof(message_header)); sz = (size_t)buffers[i].length() - sizeof(message_header); } else { ptr = (const void*)buffers[i].data(); sz = (size_t)buffers[i].length(); } lcrc = dsn_crc32_compute(ptr, sz, crc32); crc32 = dsn_crc32_concatenate( 0, 0, crc32, len, crc32, lcrc, sz ); len += sz; } dassert (len == (size_t)header->body_length, "data length is wrong"); header->body_crc32 = crc32; } header->hdr_crc32 = CRC_INVALID; header->hdr_crc32 = dsn_crc32_compute(header, sizeof(message_header), 0); } else { #ifdef _DEBUG int i_max = (int)buffers.size() - 1; size_t len = 0; for (int i = 0; i <= i_max; i++) { len += (size_t)buffers[i].length(); } dassert(len == (size_t)header->body_length + sizeof(message_header), "data length is wrong"); #endif } }