bool RunManagerYAMR::process_model_run(int sock_id, NetPackage &net_pack) { list<SlaveInfoRec>::iterator slave_info_iter = socket_to_iter_map.at(sock_id); bool use_run = false; int run_id = net_pack.get_run_id(); //check if another instance of this model run has already completed if (!run_finished(run_id)) { Parameters pars; Observations obs; Serialization::unserialize(net_pack.get_data(), pars, get_par_name_vec(), obs, get_obs_name_vec()); file_stor.update_run(run_id, pars, obs); slave_info_iter->set_state(SlaveInfoRec::State::COMPLETE); //slave_info_iter->set_state(SlaveInfoRec::State::WAITING); use_run = true; model_runs_done++; } // remove currently completed run from the active list auto it = get_active_run_iter(sock_id); unschedule_run(it); kill_runs(run_id, false, "completed on alternative node"); return use_run; }
bool RunManagerYAMR::process_model_run(int sock_id, NetPackage &net_pack) { bool use_run = false; int run_id =net_pack.get_run_id(); YamrModelRun model_run(run_id, sock_id); //check if another instance of this model run has already completed if (completed_runs.find(run_id) == completed_runs.end()) { completed_runs.insert(pair<int, YamrModelRun>(run_id, model_run)); Parameters pars; Observations obs; Serialization::unserialize(net_pack.get_data(), pars, get_par_name_vec(), obs, get_obs_name_vec()); file_stor.update_run(run_id, pars, obs); use_run = true; model_runs_done++; //beopest-style screen output for run counting //cout << setw(7) << model_runs_done; //if (model_runs_done % 9 == 0) cout << endl; } auto range_pair = active_runs.equal_range(run_id); //remaining runs with this id are not needed so mark them as zombies for ( auto b=range_pair.first; b!=range_pair.second; ++b) { if ( (*b).second.get_socket() != model_run.get_socket()) { zombie_runs.insert(*b); } } active_runs.erase(range_pair.first, range_pair.second); //kill all zombies for (auto it_zombie = zombie_runs.begin(); it_zombie != zombie_runs.end(); ++it_zombie) { if (it_zombie->second.get_id() == run_id) { int zombie_id = it_zombie->second.get_socket(); vector<string> sock_name = w_getnameinfo_vec(zombie_id); stringstream ss; ss << "killing zombie run " << run_id << " on slave : " << sock_name[0] << "$" << slave_info.get_work_dir(zombie_id); report(ss.str(), false); NetPackage net_pack(NetPackage::PackType::REQ_KILL, 0, 0, ""); char data = '\0'; int err = net_pack.send(zombie_id, &data, sizeof(data)); if (err <= 0) { report("error sending kill request to slave:" + sock_name[0] + "$" + slave_info.get_work_dir(zombie_id), true); } } } return use_run; }