static void print_title_opt(const char *main_title, const char *extra_title) { if ((dest == own_node()) && (extra_title[0] == '\0')) return; printf(main_title, for_dest(), extra_title); }
static const char *for_dest(void) { static char addr_area[ADDR_AREA]; if (dest == own_node()) return ""; sprintf(addr_area, " for node %s", addr2str(dest)); return addr_area; }
void Vivaldi::Run() { assert(dir_client_.get() != NULL); assert(osd_client_.get() != NULL); // Initialized to (0,0) by default my_vivaldi_coordinates_.set_local_error(0.0); my_vivaldi_coordinates_.set_x_coordinate(0.0); my_vivaldi_coordinates_.set_y_coordinate(0.0); ifstream vivaldi_coordinates_file(vivaldi_options_.vivaldi_filename.c_str()); if (vivaldi_coordinates_file.is_open()) { my_vivaldi_coordinates_.ParseFromIstream(&vivaldi_coordinates_file); if (!my_vivaldi_coordinates_.IsInitialized()) { Logging::log->getLog(LEVEL_ERROR) << "Vivaldi: Could not load coordinates from file: " << my_vivaldi_coordinates_.InitializationErrorString() << endl; my_vivaldi_coordinates_.Clear(); } } else { if (Logging::log->loggingActive(LEVEL_INFO)) { Logging::log->getLog(LEVEL_INFO) << "Vivaldi: Coordinates file does not exist," << "starting with empty coordinates." << endl << "Initialization might take some time." << endl; } } vivaldi_coordinates_file.close(); VivaldiNode own_node(my_vivaldi_coordinates_); uint64_t vivaldi_iterations = 0; list<KnownOSD> known_osds; bool valid_known_osds = false; vector<uint64_t> current_retries; int retries_in_a_row = 0; list<KnownOSD>::iterator chosen_osd_service; ZipfGenerator rank_generator(vivaldi_options_.vivaldi_zipf_generator_skew); for (;;) { boost::scoped_ptr<rpc::SyncCallbackBase> ping_response; try { // Get a list of OSDs from the DIR(s) if ((vivaldi_iterations % vivaldi_options_.vivaldi_max_iterations_before_updating) == 0) { valid_known_osds = UpdateKnownOSDs(&known_osds, own_node); if (valid_known_osds && !known_osds.empty()) { rank_generator.set_size(known_osds.size()); } // The pending retries are discarded, because the old OSDs might not // be in the new list current_retries.clear(); retries_in_a_row = 0; chosen_osd_service = known_osds.begin(); } // There are known OSDs, ping one of them. if (valid_known_osds && !known_osds.empty()) { // Choose an OSD, only if there's no pending retry if (retries_in_a_row == 0) { int index = rank_generator.next(); list<KnownOSD>::iterator known_iterator = known_osds.begin(); for (int i = 0; (i < index) && (known_iterator != known_osds.end()); known_iterator++, i++) { // Move the iterator over the chosen service } chosen_osd_service = known_iterator; } // Ping chosen OSD. xtreemfs_pingMesssage ping_message; ping_message.set_request_response(true); ping_message.mutable_coordinates() ->MergeFrom(*own_node.GetCoordinates()); VivaldiCoordinates* random_osd_vivaldi_coordinates; if (Logging::log->loggingActive(LEVEL_DEBUG)) { Logging::log->getLog(LEVEL_DEBUG) << "Vivaldi: recalculating against: " << chosen_osd_service->GetUUID() << endl; } SimpleUUIDIterator pinged_osd; pinged_osd.AddUUID(chosen_osd_service->GetUUID()); // start timing boost::posix_time::ptime start_time(boost::posix_time ::microsec_clock::local_time()); // execute sync ping ping_response.reset( ExecuteSyncRequest( boost::bind( &xtreemfs::pbrpc::OSDServiceClient::xtreemfs_ping_sync, osd_client_.get(), _1, boost::cref(auth_bogus_), boost::cref(user_credentials_bogus_), &ping_message), &pinged_osd, uuid_resolver_, RPCOptionsFromOptions(vivaldi_options_))); // stop timing boost::posix_time::ptime end_time( boost::posix_time::microsec_clock::local_time()); boost::posix_time::time_duration rtt = end_time - start_time; uint64_t measured_rtt = rtt.total_milliseconds(); xtreemfs::pbrpc::xtreemfs_pingMesssage* ping_response_obj = static_cast<xtreemfs::pbrpc::xtreemfs_pingMesssage*>( ping_response->response()); random_osd_vivaldi_coordinates = ping_response_obj->mutable_coordinates(); if (Logging::log->loggingActive(LEVEL_DEBUG)) { Logging::log->getLog(LEVEL_DEBUG) << "Vivaldi: ping response received. Measured time: " << measured_rtt << " ms" << endl; } // Recalculate coordinates here if (retries_in_a_row < vivaldi_options_.vivaldi_max_request_retries) { if (!own_node.RecalculatePosition(*random_osd_vivaldi_coordinates, measured_rtt, false)) { // The movement has been postponed because the measured RTT // seems to be a peak current_retries.push_back(measured_rtt); retries_in_a_row++; } else { // The movement has been accepted current_retries.clear(); retries_in_a_row = 0; } } else { // Choose the lowest RTT uint64_t lowest_rtt = measured_rtt; for (vector<uint64_t>::iterator retries_iterator = current_retries.begin(); retries_iterator < current_retries.end(); ++retries_iterator) { if (*retries_iterator < lowest_rtt) { lowest_rtt = *retries_iterator; } } // Force recalculation after too many retries own_node.RecalculatePosition(*random_osd_vivaldi_coordinates, lowest_rtt, true); current_retries.clear(); retries_in_a_row = 0; // set measured_rtt to the actually used one for trace output measured_rtt = lowest_rtt; } // update local coordinate copy here { boost::mutex::scoped_lock lock(coordinate_mutex_); my_vivaldi_coordinates_.CopyFrom(*own_node.GetCoordinates()); } // Store the new coordinates in a local file if (Logging::log->loggingActive(LEVEL_DEBUG)) { Logging::log->getLog(LEVEL_DEBUG) << "Vivaldi: storing coordinates to file: (" << own_node.GetCoordinates()->x_coordinate() << ", " << own_node.GetCoordinates()->y_coordinate() << ")" << endl; } ofstream file_out(vivaldi_options_.vivaldi_filename.c_str(), ios_base::binary | ios_base::trunc); own_node.GetCoordinates()->SerializePartialToOstream(&file_out); file_out.close(); // Update client coordinates at the DIR if (vivaldi_options_.vivaldi_enable_dir_updates) { if (Logging::log->loggingActive(LEVEL_DEBUG)) { Logging::log->getLog(LEVEL_DEBUG) << "Vivaldi: Sending coordinates to DIR." << endl; } boost::scoped_ptr<rpc::SyncCallbackBase> response; try { response.reset( ExecuteSyncRequest( boost::bind( &xtreemfs::pbrpc::DIRServiceClient ::xtreemfs_vivaldi_client_update_sync, dir_client_.get(), _1, boost::cref(auth_bogus_), boost::cref(user_credentials_bogus_), own_node.GetCoordinates()), dir_service_addresses_.get(), NULL, RPCOptionsFromOptions(vivaldi_options_), true)); response->DeleteBuffers(); } catch (const XtreemFSException& e) { if (response.get()) { response->DeleteBuffers(); } if (Logging::log->loggingActive(LEVEL_INFO)) { Logging::log->getLog(LEVEL_INFO) << "Vivaldi: Failed to send the updated client" " coordinates to the DIR, error: " << e.what() << endl; } } } // //Print a trace // char auxStr[256]; // SPRINTF_VIV(auxStr, // 256, // "%s:%lld(Viv:%.3f) Own:(%.3f,%.3f) lE=%.3f " // "Rem:(%.3f,%.3f) rE=%.3f %s\n", // retried ? "RETRY" : "RTT", // static_cast<long long int> (measured_rtt), // own_node.calculateDistance( // (*own_node.getCoordinates()), // random_osd_vivaldi_coordinates.get()), // own_node.getCoordinates()->x_coordinate(), // own_node.getCoordinates()->y_coordinate(), // own_node.getCoordinates()->local_error(), // random_osd_vivaldi_coordinates->x_coordinate(), // random_osd_vivaldi_coordinates->y_coordinate(), // random_osd_vivaldi_coordinates->local_error(), // chosen_osd_service->get_uuid().data()); // get_log()->getStream(YIELD::platform::Log::LOG_INFO) << // "Vivaldi: " << auxStr; // Update OSD's coordinates chosen_osd_service->SetCoordinates(*random_osd_vivaldi_coordinates); // Re-sort known_osds // TODO(mno): Use a more efficient sort approach. list<KnownOSD> aux_osd_list(known_osds); KnownOSD chosen_osd_service_value = *chosen_osd_service; known_osds.clear(); // NOTE: this invalidates all ptrs and itrs for (list<KnownOSD>::reverse_iterator aux_iterator = aux_osd_list.rbegin(); aux_iterator != aux_osd_list.rend(); aux_iterator++) { double new_osd_distance = own_node.CalculateDistance( *(aux_iterator->GetCoordinates()), *own_node.GetCoordinates()); list<KnownOSD>::iterator known_iterator = known_osds.begin(); while (known_iterator != known_osds.end()) { double old_osd_distance = \ own_node.CalculateDistance( *(known_iterator->GetCoordinates()), *own_node.GetCoordinates()); if (old_osd_distance >= new_osd_distance) { known_osds.insert(known_iterator, (*aux_iterator)); break; } else { known_iterator++; } } if (known_iterator == known_osds.end()) { known_osds.push_back((*aux_iterator)); } } // end re-sorting // find the chosen OSD in the resorted list chosen_osd_service = find(known_osds.begin(), known_osds.end(), chosen_osd_service_value); assert(chosen_osd_service != known_osds.end()); ping_response->DeleteBuffers(); } else { if (Logging::log->loggingActive(LEVEL_WARN)) { Logging::log->getLog(LEVEL_WARN) << "Vivaldi: no OSD available." << endl; } } vivaldi_iterations = (vivaldi_iterations + 1) % LONG_MAX; // Sleep until the next iteration uint32_t sleep_in_s = static_cast<uint32_t>( vivaldi_options_.vivaldi_recalculation_interval_s - vivaldi_options_.vivaldi_recalculation_epsilon_s + (static_cast<double>(rand()) / (RAND_MAX - 1)) * 2.0 * vivaldi_options_.vivaldi_recalculation_epsilon_s); if (Logging::log->loggingActive(LEVEL_DEBUG)) { Logging::log->getLog(LEVEL_DEBUG) << "Vivaldi: sleeping during " << sleep_in_s << " s." << endl; } boost::this_thread::sleep(boost::posix_time::seconds(sleep_in_s)); } catch (const XtreemFSException& e) { if (ping_response.get()) { ping_response->DeleteBuffers(); } Logging::log->getLog(LEVEL_ERROR) << "Vivaldi: could not ping OSDs: " << e.what() << endl; // We must avoid to keep retrying indefinitely against an OSD which is not // responding if (retries_in_a_row > 0 && (++retries_in_a_row >= vivaldi_options_.vivaldi_max_request_retries)) { // If the last retry times out all the previous retries are discarded current_retries.clear(); retries_in_a_row = 0; } } catch(const boost::thread_interrupted&) { if (ping_response.get()) { ping_response->DeleteBuffers(); } break; } } } // Run()
int main(int argc, char *argv[], char *dummy[]) { ushort tcp_port = 4711; struct srv_info sinfo; uint cmd; uint max_msglen; struct sockaddr_in srv_addr; int lstn_sd, peer_sd; int srv_id = 0, srv_cnt = 0;; own_node_addr = own_node(); memset(&sinfo, 0, sizeof(sinfo)); if (signal(SIGALRM, sig_alarm) == SIG_ERR) die("Server master: can't catch alarm signals\n"); printf("****** TIPC Benchmark Server Started ******\n"); /* Create socket for communication with master: */ reset: master_sd = socket(AF_TIPC, SOCK_RDM, 0); if (master_sd < 0) die("Server: Can't create socket to master\n"); if (bind(master_sd, (struct sockaddr *)&srv_ctrl_addr, sizeof(srv_ctrl_addr))) die("Server: Failed to bind to master socket\n"); /* Wait for command from master: */ srv_from_master(&cmd, &max_msglen, 0, 0); buf = malloc(max_msglen); if (!buf) die("Failed to create buffer of size %u\n", ntohl(max_msglen)); /* Create TIPC or TCP listening socket: */ if (cmd == TIPC_CONN) { lstn_sd = socket (AF_TIPC, SOCK_STREAM,0); if (lstn_sd < 0) die("Server master: can't create listening socket\n"); if (bind(lstn_sd, (struct sockaddr *)&srv_lstn_addr, sizeof(srv_lstn_addr)) < 0) die("TIPC Server master: failed to bind port name\n"); printf("****** TIPC Listener Socket Created ******\n"); srv_to_master(SRV_INFO, 0); close(master_sd); } else if (cmd == TCP_CONN) { if ((lstn_sd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) die("TCP Server: failed to create listener socket"); /* Construct listener address structure */ memset(&srv_addr, 0, sizeof(srv_addr)); srv_addr.sin_family = AF_INET; srv_addr.sin_addr.s_addr = htonl(INADDR_ANY); srv_addr.sin_port = htons(tcp_port); /* Bind socket to address */ while (0 > bind(lstn_sd, (struct sockaddr *) &srv_addr, sizeof(srv_addr))) srv_addr.sin_port = htons(++tcp_port); /* Inform master about own IP addresses and listener port number */ get_ip_list(&sinfo, NULL); sinfo.tcp_port = htons(tcp_port); printf("****** TCP Listener Socket Created ******\n"); srv_to_master(SRV_INFO, &sinfo); close(master_sd); } else { close(master_sd); goto reset; } /* Listen for incoming connections */ if (listen(lstn_sd, 32) < 0) die("Server: listen() failed"); while (1) { if (waitpid(-1, NULL, WNOHANG) > 0) { if (--srv_cnt) continue; close(lstn_sd); printf("****** Listener Socket Deleted ******\n"); goto reset; } peer_sd = wait_for_connection(lstn_sd); if (!peer_sd) continue; srv_id++; srv_cnt++; if (fork()) { close(peer_sd); continue; } /* Continue in child process */ close(lstn_sd); dprintf("calling echo: peer_sd: %u, srv_cnt = %u\n",peer_sd, srv_cnt); master_sd = socket(AF_TIPC, SOCK_RDM, 0); if (master_sd < 0) die("Server: Can't create socket to master\n"); if (bind(master_sd, (struct sockaddr *)&srv_ctrl_addr, sizeof(srv_ctrl_addr))) die("Server: Failed to bind to master socket\n"); echo_messages(peer_sd, master_sd, srv_id); } close(lstn_sd); printf("****** TIPC Benchmark Server Finished ******\n"); exit(0); return 0; }
int main(int argc, char *argv[], char *dummy[]) { int c; int l_mult = 1; int t_mult = 1; int req_clients = DEFAULT_CLIENTS; __u32 server_node; int client_master_sd; struct sockaddr_tipc dest_addr; pid_t child_pid; unsigned long long num_clients; unsigned int client_id; unsigned int server_num; struct client_cmd cmd = {0,0,0,0,0}; int wait_err; /* Process command line arguments */ while ((c = getopt(argc, argv, "n:t:l:h")) != -1) { switch (c) { case 'l': l_mult = atoi(optarg); if (l_mult < 0) { fprintf(stderr, "Invalid latency multiplier [%d]\n", l_mult); exit(1); } break; case 't': t_mult = atoi(optarg); if (t_mult < 0) { fprintf(stderr, "Invalid throughput multiplier [%d]\n", t_mult); exit(1); } break; case 'n': req_clients = atoi(optarg); if (req_clients <= 0) { fprintf(stderr, "Invalid number of clients " "[%d]\n", req_clients); exit(1); } break; default: usage(argv[0]); return 1; } } /* Wait for benchmark server to appear */ server_node = wait_for_server(SERVER_NAME, 0, MAX_DELAY); /* Create socket used to communicate with child clients */ client_master_sd = socket(AF_TIPC, SOCK_RDM, 0); if (client_master_sd < 0) { printf("Client master: Can't create main client socket\n"); perror(NULL); exit(1); } dest_addr.family = AF_TIPC; dest_addr.addrtype = TIPC_ADDR_NAME; dest_addr.scope = TIPC_NODE_SCOPE; dest_addr.addr.name.name.type = MCLIENT_NAME; dest_addr.addr.name.name.instance = 0; if (bind(client_master_sd, (struct sockaddr *)&dest_addr, sizeof(dest_addr))) { printf("Client master: Failed to bind\n"); perror(NULL); exit(1); } printf("****** TIPC benchmark client started ******\n"); num_clients = 0; dest_addr.addr.name.name.type = CLIENT_NAME; dest_addr.addr.name.domain = 0; /* Optionally run latency test */ if (!l_mult) goto end_latency; printf("Client master: Starting Latency Benchmark\n"); /* Create first child client */ fflush(stdout); child_pid = fork(); if (child_pid < 0) { printf ("Client master: fork failed\n"); perror(NULL); exit(1); } num_clients++; if (!child_pid) { close(client_master_sd); clientmain(num_clients); /* Note: child client never returns */ } dprintf("Client master: waiting for confirmation from client 1\n"); wait_err = wait_for_msg(client_master_sd); if (wait_err) { printf("Client master: no confirmation from client 1 (err=%u)\n", wait_err); exit(1); } if (recv(client_master_sd, buf, 4, 0) != 4) { printf ("Client master: confirmation failure from client 1\n"); perror(NULL); exit(1); } server_num = *(unsigned int *)buf; dprintf("Client master: client 1 linked to server %i\n", server_num); /* Run latency test */ cmd.msg_size = 64; cmd.msg_count = 10240 * l_mult; cmd.burst_size = cmd.msg_count; while (cmd.msg_size <= TIPC_MAX_USER_MSG_SIZE) { struct client_cmd ccmd; int sz; struct timeval start_time; unsigned long long elapsed; printf("Exchanging %llu messages of size %llu octets (burst size %llu)\n", cmd.msg_count, cmd.msg_size, cmd.burst_size); dprintf(" client 1 <--> server %d\n", server_num); cmd.client_no = 1; dest_addr.addr.name.name.instance = 1; gettimeofday(&start_time, 0); if (sendto(client_master_sd, &cmd, sizeof(cmd), 0, (struct sockaddr *)&dest_addr, sizeof(dest_addr)) != sizeof(cmd)) { printf("Client master: Can't send to client 1\n"); perror(NULL); exit(1); } wait_err = wait_for_msg(client_master_sd); if (wait_err) { printf("Client master: No result from client 1 (err=%u)\n", wait_err); exit(1); } sz = recv(client_master_sd, &ccmd, sizeof(ccmd), 0); elapsed = elapsedmillis(&start_time); if (sz != sizeof(ccmd)) { printf("Client master: invalid result from client 1\n"); perror(NULL); exit(1); } dprintf("Client master:rec cmd msg of size %i [%u:%llu:%llu:%u]\n", sz,ccmd.cmd,ccmd.msg_size,ccmd.msg_count,ccmd.client_no); dprintf("Client master: received TASK_FINISHED from client 1\n"); printf("... took %llu ms (round-trip avg/msg: %llu us)\n", elapsed, (elapsed * 1000)/cmd.msg_count); cmd.msg_size *= 4; cmd.msg_count /= 4; cmd.burst_size /= 4; } printf("Client master: Completed Latency Benchmark\n"); end_latency: /* Optionally run throughput test */ if (!t_mult) goto end_thruput; printf("Client master: Starting Throughput Benchmark\n"); /* Create remaining child clients */ while (num_clients < req_clients) { int sz; fflush(stdout); child_pid = fork(); if (child_pid < 0) { printf ("Client master: fork failed\n"); perror(NULL); exit(1); } num_clients++; if (!child_pid) { close(client_master_sd); clientmain(num_clients); /* Note: child client never returns */ } dprintf ("Client master: waiting for confirmation " "from client %llu\n", num_clients); wait_err = wait_for_msg(client_master_sd); if (wait_err) { printf("Client master: no confirmation from client %llu " "(err=%u)\n", num_clients, wait_err); exit(1); } sz = recv(client_master_sd, buf, 4, 0); if (sz != 4) { printf("Client master: confirmation failure " "from client_id %llu\n", num_clients); exit(1); } server_num = *(unsigned int*)buf; dprintf("Client master: client %llu linked to server %i\n", num_clients, server_num); } dprintf("Client master: all clients and servers started\n"); sleep(2); /* let console printfs flush before continuing */ /* Get child clients to run throughput test */ cmd.msg_size = 64; cmd.msg_count = 10240 * t_mult; cmd.burst_size = 10240/5; while (cmd.msg_size < TIPC_MAX_USER_MSG_SIZE) { struct timeval start_time; unsigned long long elapsed; unsigned long long msg_per_sec; unsigned long long procs; printf("Exchanging %llu*%llu messages of size %llu octets (burst size %llu)\n", num_clients, cmd.msg_count, cmd.msg_size, cmd.burst_size); gettimeofday(&start_time, 0); for (client_id = 1; client_id <= num_clients; client_id++) { cmd.client_no = client_id; dest_addr.addr.name.name.instance = client_id; if (sendto(client_master_sd, &cmd, sizeof(cmd), 0, (struct sockaddr *)&dest_addr, sizeof(dest_addr)) != sizeof(cmd)) { printf("Client master: can't send to client %u\n", client_id); perror(NULL); exit(1); } } for (client_id = 1; client_id <= num_clients; client_id++) { struct client_cmd report; int sz; wait_err = wait_for_msg(client_master_sd); if (wait_err) { printf("Client master: result %u not received " "(err=%u)\n", client_id, wait_err); exit(1); } sz = recv(client_master_sd, &report, sizeof(report), 0); if (sz != sizeof(report)) { printf("Client master: result %u invalid\n", client_id); perror(NULL); exit(1); } dprintf("Client master: received TASK_FINISHED " "from client %u\n", report.client_no); } elapsed = elapsedmillis(&start_time); msg_per_sec = (cmd.msg_count * num_clients * 1000)/elapsed; procs = 1 + (server_node != own_node(client_master_sd)); printf("... took %llu ms " "(avg %llu msg/s/dir, %llu bits/s/dir)\n", elapsed, msg_per_sec/2, msg_per_sec*cmd.msg_size*8/2); printf(" avg execution time (send+receive) %llu us/msg\n", (1000000 / (msg_per_sec * 2)) * procs); cmd.msg_size *= 4; cmd.msg_count /= 4; cmd.burst_size /= 4; } printf("Client master: Completed Throughput Benchmark\n"); end_thruput: /* Terminate all client processes */ cmd.cmd = TERMINATE; for (client_id = 1; client_id <= num_clients; client_id++) { dest_addr.addr.name.name.instance = client_id; if (sendto(client_master_sd, &cmd, sizeof(cmd), 0, (struct sockaddr *)&dest_addr, sizeof(dest_addr)) <= 0) { printf("Client master: failed to send TERMINATE message" " to client %u\n", client_id); perror(NULL); exit(1); } } if (signal(SIGALRM, sig_alarm) == SIG_ERR) { printf("Client master: Can't catch alarm signals\n"); perror(NULL); exit(1); } alarm(MAX_DELAY); for (client_id = 1; client_id <= num_clients; client_id++) { if (wait(NULL) <= 0) { printf("Client master: error during termination\n"); perror(NULL); exit(1); } } printf("****** TIPC benchmark client finished ******\n"); shutdown(client_master_sd, SHUT_RDWR); close(client_master_sd); exit(0); }