comm_server::comm_server(std::vector<std::string> zkhosts, std::string name, std::string alternate_bind_address, std::string alternate_control_address, std::string alternate_publish_address, std::string secret_key): started(false), comm_server_debug_mode(std::getenv("GRAPHLAB_COMM_SERVER_DEBUG_MODE") != NULL) { object_socket = new nanosockets::async_reply_socket( boost::bind(&comm_server::callback, this, _1, _2), 1, // 2 threads. one to handle pings, one to handle real messages alternate_bind_address); if(alternate_bind_address.size() == 0) { alternate_bind_address = object_socket->get_bound_address(); } logstream(LOG_INFO) << "my alt bind address: " << alternate_bind_address << std::endl; control_socket = new nanosockets::async_reply_socket( boost::bind(&comm_server::callback, this, _1, _2), 1, (alternate_control_address.length()==0) ? generate_aux_address(alternate_bind_address, "_control") : alternate_control_address); publishsock = new nanosockets::publish_socket( (alternate_publish_address.length()==0) ? generate_aux_address(alternate_bind_address, "_status") : alternate_publish_address); get_srv_running_command().store(0); get_cancel_bit_checked().store(false); logstream(LOG_EMPH) << "Server listening on: " << object_socket->get_bound_address() << std::endl; logstream(LOG_INFO) << "Server Control listening on: " << control_socket->get_bound_address() << std::endl; logstream(LOG_INFO) << "Server status published on: " << publishsock->get_bound_address() << std::endl; // there is a chicken and egg problem here. We can't use the object // factory to create the object factory. So, here it is: manual construction // and registration of the object factory object_factory = new object_factory_impl(*this); register_type<object_factory_base>([&]() { return new object_factory_impl(*this); } ); auto deleter = +[](void* v) { if (v != NULL) { delete reinterpret_cast<object_factory_impl*>(v); } }; std::shared_ptr<void> object_ptr(object_factory, deleter); registered_objects.insert({0, object_ptr}); std::random_device rd; lcg_seed = (size_t(rd()) << 32) + rd(); }
void comm_server::stop() { log_func_entry(); if (started) { started = false; } // Attempt to cancel any currently running command get_srv_running_command().store((unsigned long long)uint64_t(-1)); }
std::string object_factory_impl::ping(std::string pingval) { unsigned long long cancel_id = 0; if(boost::starts_with(pingval, "ctrlc")) { std::string cancel_str = pingval.substr(5, pingval.length() - 5); cancel_id = std::stoull(cancel_str); } if(cancel_id != 0) { // If the cancelled command matches the currently running one, change // this value to uint64_t(-1) to show that we must cancel. bool ret = get_srv_running_command().compare_exchange_strong(cancel_id, (unsigned long long)uint64_t(-1)); if(ret) { logstream(LOG_DEBUG) << "Cancelling command " << cancel_id << std::endl; } } return pingval; }
bool comm_server::callback(nanosockets::zmq_msg_vector& recv, nanosockets::zmq_msg_vector& reply) { // construct a call message from the received block call_message call; reply_message rep; bool success = call.construct(recv); if (!success) { rep.copy_body_from("Invalid Message"); rep.status = reply_status::BAD_MESSAGE; rep.emit(reply); return true; } // find the object ID { boost::lock_guard<boost::mutex> guard(registered_object_lock); if (registered_objects.count(call.objectid) == 0) { std::string ret = "No such object " + std::to_string(call.objectid); logstream(LOG_ERROR) << ret << std::endl; rep.copy_body_from(ret); rep.status = reply_status::NO_OBJECT; rep.emit(reply); return true; } } // // find the function if (dispatch_map.count(call.function_name) == 0) { std::string ret = "No such function " + call.function_name; logstream(LOG_ERROR) << ret << std::endl; rep.copy_body_from(ret); rep.status = reply_status::NO_FUNCTION; rep.emit(reply); return true; } std::string trimmed_function_name; // trim the function call printing to stop at the first space std::copy(call.function_name.begin(), std::find(call.function_name.begin(), call.function_name.end(), ' '), std::inserter(trimmed_function_name, trimmed_function_name.end())); std::string message = "Calling object " + std::to_string(call.objectid) + " function: " + trimmed_function_name; if(comm_server_debug_mode) { logstream(LOG_DEBUG) << message << std::endl; } /* * if (trimmed_function_name == "object_factory_base::ping" || call.objectid == 0) { * logstream(LOG_DEBUG) << message << "\n"; * } else { * logstream(LOG_INFO) << message << "\n"; * } */ report_status(STATUS_COMM_SERVER_INFO, message); // ok we are good to go // create the appropriate archives graphlab::iarchive iarc(call.body, call.bodylen); graphlab::oarchive oarc; // Now set the currently running command if this is a real command (not a ping) auto ret = call.properties.find(std::string("command_id")); bool real_command = false; if(ret != call.properties.end()) { unsigned long long ul = std::stoull(ret->second); get_srv_running_command().store(ul); real_command = true; } rep.status = reply_status::OK; try { dispatch_map[call.function_name]->execute( registered_objects[call.objectid].get(), this, iarc, oarc); } catch (const std::ios_base::failure& e) { // IO Exception rep.copy_body_from(e.what()); report_status(STATUS_COMM_SERVER_ERROR, e.what()); rep.status = reply_status::IO_ERROR; } catch (std::bad_alloc& e) { // MEMORY Exception rep.copy_body_from(e.what()); report_status(STATUS_COMM_SERVER_ERROR, e.what()); rep.status = reply_status::MEMORY_ERROR; } catch (std::out_of_range& e) { // INDEX Exception rep.copy_body_from(e.what()); report_status(STATUS_COMM_SERVER_ERROR, e.what()); rep.status = reply_status::INDEX_ERROR; } catch (std::bad_cast& e) { // TYPE Exception rep.copy_body_from(e.what()); report_status(STATUS_COMM_SERVER_ERROR, e.what()); rep.status = reply_status::TYPE_ERROR; } catch (std::string& s) { // General Exception rep.copy_body_from(s); report_status(STATUS_COMM_SERVER_ERROR, s); rep.status = reply_status::EXCEPTION; } catch (const char* s) { rep.copy_body_from(s); report_status(STATUS_COMM_SERVER_ERROR, s); rep.status = reply_status::EXCEPTION; } catch (std::exception& e) { rep.copy_body_from(e.what()); report_status(STATUS_COMM_SERVER_ERROR, e.what()); rep.status = reply_status::EXCEPTION; } catch (...) { rep.copy_body_from("Unknown Runtime Exception"); report_status(STATUS_COMM_SERVER_ERROR, "Unknown Runtime Exception"); rep.status = reply_status::EXCEPTION; } /* * Complete hack. * For whatever reason zeromq's zmq_msg_send and zmq_msg_recv function * return the size of the mesage sent in an int. Even though the message * size can be size_t. * Also, zmq_msg_send/zmq_msg_recv use "-1" return for failure, thus * bringing up the issue of integer overflow just "coincidentally" hitting * -1 and thus failing terribly terribly. * Solution is simple. Pad the buffer to even. */ if (oarc.off & 1) oarc.write(" ", 1); report_status(STATUS_COMM_SERVER_INFO, "Function Execution Success"); if (rep.status == reply_status::OK) { rep.body = oarc.buf; rep.bodylen = oarc.off; } // Command is now over, so this is not the running command anymore if(real_command) { std::atomic<bool> &cancel_checked = get_cancel_bit_checked(); if(cancel_checked.load()) { if(must_cancel()) { rep.properties.insert( std::make_pair(std::string("cancel"), std::string("true"))); } else { rep.properties.insert( std::make_pair(std::string("cancel"), std::string("false"))); } } get_srv_running_command().store(0); cancel_checked.store(false); } rep.emit(reply); return true; }
comm_server::comm_server(std::vector<std::string> zkhosts, std::string name, std::string alternate_bind_address, std::string alternate_control_address, std::string alternate_publish_address, std::string secret_key): started(false), zmq_ctx(zmq_ctx_new()), keyval(zkhosts.empty() ? // make a keyval only if zkhosts is not empty NULL : // null otherwise new graphlab::zookeeper_util::key_value(zkhosts, "cppipc", name)), comm_server_debug_mode(std::getenv("GRAPHLAB_COMM_SERVER_DEBUG_MODE") != NULL) { object_socket = new libfault::async_reply_socket(zmq_ctx, keyval, boost::bind(&comm_server::callback, this, _1, _2), 1, // 2 threads. one to handle pings, one to handle real messages alternate_bind_address, secret_key); if(alternate_bind_address.size() == 0) { alternate_bind_address = object_socket->get_bound_address(); } logstream(LOG_INFO) << "my alt bind address: " << alternate_bind_address << std::endl; control_socket = new libfault::async_reply_socket(zmq_ctx, keyval, boost::bind(&comm_server::callback, this, _1, _2), 1, (keyval==NULL && alternate_control_address.length()==0) ? generate_aux_address(alternate_bind_address, "_control") : alternate_control_address); publishsock = new libfault::publish_socket(zmq_ctx, keyval, // honestly, this syntax is *terrible*. // If Zookeeper is not used, and alternate_publish_address not // provided, we generate one based on the bind address (keyval==NULL && alternate_publish_address.length()==0) ? generate_aux_address(alternate_bind_address, "_status") : alternate_publish_address); get_srv_running_command().store(0); get_cancel_bit_checked().store(false); pollset = new libfault::socket_receive_pollset; if (keyval != NULL && !object_socket->register_key("call")) { logstream(LOG_ERROR) << "Unable to register the zookeeper key for the main server. " "Perhaps there is already a server with this name?"; throw("Unable to register with zookeeper"); } if (keyval != NULL && !control_socket->register_key("control")) { logstream(LOG_ERROR) << "Unable to register the zookeeper key for the main server's control socket. " "Perhaps there is already a server with this name?"; throw("Unable to register with zookeeper"); } if (keyval != NULL && !publishsock->register_key("status")) { logstream(LOG_ERROR) << "Unable to register the zookeeper key for the publishsock. " "Perhaps there is already a server with this name?"; throw("Unable to register with zookeeper"); } object_socket->add_to_pollset(pollset); control_socket->add_to_pollset(pollset); logstream(LOG_EMPH) << "Server listening on: " << object_socket->get_bound_address() << std::endl; logstream(LOG_INFO) << "Server Control listening on: " << control_socket->get_bound_address() << std::endl; logstream(LOG_INFO) << "Server status published on: " << publishsock->get_bound_address() << std::endl; // there is a chicken and egg problem here. We can't use the object // factory to create the object factory. So, here it is: manual construction // and registration of the object factory object_factory = new object_factory_impl(*this); register_type<object_factory_base>([&]() { return new object_factory_impl(*this); } ); auto deleter = +[](void* v) { if (v != NULL) { delete reinterpret_cast<object_factory_impl*>(v); } }; std::shared_ptr<void> object_ptr(object_factory, deleter); registered_objects.insert({0, object_ptr}); std::random_device rd; lcg_seed = (size_t(rd()) << 32) + rd(); }