bool RunManagerYAMR::process_model_run(int sock_id, NetPackage &net_pack) { list<SlaveInfoRec>::iterator slave_info_iter = socket_to_iter_map.at(sock_id); bool use_run = false; int run_id = net_pack.get_run_id(); //check if another instance of this model run has already completed if (!run_finished(run_id)) { Parameters pars; Observations obs; Serialization::unserialize(net_pack.get_data(), pars, get_par_name_vec(), obs, get_obs_name_vec()); file_stor.update_run(run_id, pars, obs); slave_info_iter->set_state(SlaveInfoRec::State::COMPLETE); //slave_info_iter->set_state(SlaveInfoRec::State::WAITING); use_run = true; model_runs_done++; } // remove currently completed run from the active list auto it = get_active_run_iter(sock_id); unschedule_run(it); kill_runs(run_id, false, "completed on alternative node"); return use_run; }
void NetConnection::processPackage(NetAddress& client, const NetPackage& package) { NetObject* pobject = NULL; ArrayStream arraystream(package.getData(), package.getDataSize()); NetObjectStream stream(arraystream); stream >> &pobject; std::unique_ptr<NetEvent> event(static_cast<NetEvent*>(pobject)); mObserver.onEvent(client.index, *event); client.nextPackageNumber++; }
bool RunManagerYAMR::process_model_run(int sock_id, NetPackage &net_pack) { bool use_run = false; int run_id =net_pack.get_run_id(); YamrModelRun model_run(run_id, sock_id); //check if another instance of this model run has already completed if (completed_runs.find(run_id) == completed_runs.end()) { completed_runs.insert(pair<int, YamrModelRun>(run_id, model_run)); Parameters pars; Observations obs; Serialization::unserialize(net_pack.get_data(), pars, get_par_name_vec(), obs, get_obs_name_vec()); file_stor.update_run(run_id, pars, obs); use_run = true; model_runs_done++; //beopest-style screen output for run counting //cout << setw(7) << model_runs_done; //if (model_runs_done % 9 == 0) cout << endl; } auto range_pair = active_runs.equal_range(run_id); //remaining runs with this id are not needed so mark them as zombies for ( auto b=range_pair.first; b!=range_pair.second; ++b) { if ( (*b).second.get_socket() != model_run.get_socket()) { zombie_runs.insert(*b); } } active_runs.erase(range_pair.first, range_pair.second); //kill all zombies for (auto it_zombie = zombie_runs.begin(); it_zombie != zombie_runs.end(); ++it_zombie) { if (it_zombie->second.get_id() == run_id) { int zombie_id = it_zombie->second.get_socket(); vector<string> sock_name = w_getnameinfo_vec(zombie_id); stringstream ss; ss << "killing zombie run " << run_id << " on slave : " << sock_name[0] << "$" << slave_info.get_work_dir(zombie_id); report(ss.str(), false); NetPackage net_pack(NetPackage::PackType::REQ_KILL, 0, 0, ""); char data = '\0'; int err = net_pack.send(zombie_id, &data, sizeof(data)); if (err <= 0) { report("error sending kill request to slave:" + sock_name[0] + "$" + slave_info.get_work_dir(zombie_id), true); } } } return use_run; }
int YAMRSlave::recv_message(NetPackage &net_pack) { fd_set read_fds; int err = -1; for(;;) { read_fds = master; // copy master if (w_select(fdmax+1, &read_fds, NULL, NULL, NULL) == -1) { exit(4); } for(int i = 0; i <= fdmax; i++) { if (FD_ISSET(i, &read_fds)) { // got message to read if(( err=net_pack.recv(i)) <=0) // error or lost connection { vector<string> sock_name = w_getnameinfo_vec(i); if (err < 0) { cerr << "receive from master failed: " << sock_name[0] <<":" <<sock_name[1] << endl; } else { cerr << "lost connection to master: " << sock_name[0] <<":" <<sock_name[1] << endl; w_close(i); // bye! FD_CLR(i, &master); // remove from master set } } else { // received data sored in net_pack return to calling routine to process it } return err; } } } return err; }
int YAMRSlave::send_message(NetPackage &net_pack, const void *data, unsigned long data_len) { int err; int n; for (err = -1, n=0; err==-1; ++n) { err = net_pack.send(sockfd, data, data_len); } return err; }