int YAMRSlave::send_message(NetPackage &net_pack, const void *data, unsigned long data_len) { int err; int n; for (err = -1, n=0; err==-1; ++n) { err = net_pack.send(sockfd, data, data_len); } return err; }
bool RunManagerYAMR::process_model_run(int sock_id, NetPackage &net_pack) { bool use_run = false; int run_id =net_pack.get_run_id(); YamrModelRun model_run(run_id, sock_id); //check if another instance of this model run has already completed if (completed_runs.find(run_id) == completed_runs.end()) { completed_runs.insert(pair<int, YamrModelRun>(run_id, model_run)); Parameters pars; Observations obs; Serialization::unserialize(net_pack.get_data(), pars, get_par_name_vec(), obs, get_obs_name_vec()); file_stor.update_run(run_id, pars, obs); use_run = true; model_runs_done++; //beopest-style screen output for run counting //cout << setw(7) << model_runs_done; //if (model_runs_done % 9 == 0) cout << endl; } auto range_pair = active_runs.equal_range(run_id); //remaining runs with this id are not needed so mark them as zombies for ( auto b=range_pair.first; b!=range_pair.second; ++b) { if ( (*b).second.get_socket() != model_run.get_socket()) { zombie_runs.insert(*b); } } active_runs.erase(range_pair.first, range_pair.second); //kill all zombies for (auto it_zombie = zombie_runs.begin(); it_zombie != zombie_runs.end(); ++it_zombie) { if (it_zombie->second.get_id() == run_id) { int zombie_id = it_zombie->second.get_socket(); vector<string> sock_name = w_getnameinfo_vec(zombie_id); stringstream ss; ss << "killing zombie run " << run_id << " on slave : " << sock_name[0] << "$" << slave_info.get_work_dir(zombie_id); report(ss.str(), false); NetPackage net_pack(NetPackage::PackType::REQ_KILL, 0, 0, ""); char data = '\0'; int err = net_pack.send(zombie_id, &data, sizeof(data)); if (err <= 0) { report("error sending kill request to slave:" + sock_name[0] + "$" + slave_info.get_work_dir(zombie_id), true); } } } return use_run; }