// Try to send the client this result // This can fail because: // - result needs more disk/mem/speed than host has // - already sent a result for this WU // - no app_version available // static int possibly_send_result(SCHED_DB_RESULT& result) { DB_WORKUNIT wu; SCHED_DB_RESULT result2; int retval; long count; char buf[256]; BEST_APP_VERSION* bavp; g_wreq->no_jobs_available = false; retval = wu.lookup_id(result.workunitid); if (retval) return ERR_DB_NOT_FOUND; // This doesn't take into account g_wreq->allow_non_selected_apps, // however Einstein@Home, which is the only project that currently uses // this locality scheduler, doesn't support the respective project-specific // preference setting // if (app_not_selected(wu.appid)) return ERR_NO_APP_VERSION; bavp = get_app_version(wu, true, false); if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) { char help_msg_buf[512]; sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name ); g_reply->insert_message(help_msg_buf, "notice"); g_reply->set_delay(DELAY_ANONYMOUS); } if (!bavp) return ERR_NO_APP_VERSION; APP* app = ssp->lookup_app(wu.appid); retval = wu_is_infeasible_fast( wu, result.server_state, result.report_deadline, result.priority, *app, *bavp ); if (retval) return retval; if (config.one_result_per_user_per_wu) { sprintf(buf, "where userid=%lu and workunitid=%lu", g_reply->user.id, wu.id); retval = result2.count(count, buf); if (retval) return ERR_DB_NOT_FOUND; if (count > 0) return ERR_WU_USER_RULE; } return add_result_to_reply(result, wu, bavp, true); }
// do fast checks on this job, i.e. ones that don't require DB access // if any check fails, return false // static bool quick_check( WU_RESULT& wu_result, WORKUNIT& wu, // a mutable copy of wu_result.workunit. // We may modify its delay_bound, rsc_fpops_est, and rsc_fpops_bound BEST_APP_VERSION* &bavp, APP* app, int& last_retval ) { int retval; // If we're looking for beta jobs and this isn't one, skip it // if (g_wreq->beta_only) { if (!app->beta) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] job is not from beta app; skipping\n" ); } return false; } if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] [HOST#%lu] beta work found: [RESULT#%lu]\n", g_reply->host.id, wu_result.resultid ); } } else { if (app->beta) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] job is from beta app; skipping\n" ); } return false; } } // Are we scanning for need_reliable results? // skip this check the app is beta // (beta apps don't use the reliable mechanism) // if (!app->beta) { if (g_wreq->reliable_only && (!wu_result.need_reliable)) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] job doesn't need reliable host; skipping\n" ); } return false; } else if (!g_wreq->reliable_only && wu_result.need_reliable) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] job needs reliable host; skipping\n" ); } return false; } } // don't send if we are looking for infeasible results // and the result is not infeasible // if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] job is not infeasible; skipping\n" ); } return false; } // locality sched lite check. // Allow non-LSL jobs; otherwise we could starve them // NOTE: THIS NEGATES THE OTHER SCHED POLICIES (reliable, etc.). // Need to think of some way of combining them. // if (g_wreq->locality_sched_lite) { // skip this job if host has sticky files // but none of them is used by this job. // TODO: it should really be "host has sticky files for this app". // However, we don't have a way of making that association. // Could add something based on filename // if (app->locality_scheduling == LOCALITY_SCHED_LITE && g_request->file_infos.size() ) { int n = nfiles_on_host(wu_result.workunit); if (config.debug_locality_lite) { log_messages.printf(MSG_NORMAL, "[loc_lite] job %s has %d files on this host\n", wu_result.workunit.name, n ); } if (n == 0) { return false; } } } // Find the best app_version for this host. // bavp = get_app_version(wu, true, g_wreq->reliable_only); if (!bavp) { if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] No app version for job; skipping\n" ); } return false; } // Check app filter if needed. // Do this AFTER get_app_version(), otherwise we could send // a misleading message to user // if (g_wreq->user_apps_only && (!g_wreq->beta_only || config.distinct_beta_apps) ) { if (app_not_selected(app->id)) { g_wreq->no_allowed_apps_available = true; if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] [USER#%lu] [WU#%lu] user doesn't want work for app %s\n", g_reply->user.id, wu.id, app->name ); } return false; } } // Check whether we can send this job. // This may modify wu.delay_bound and wu.rsc_fpops_est // retval = wu_is_infeasible_fast( wu, wu_result.res_server_state, wu_result.res_priority, wu_result.res_report_deadline, *app, *bavp ); if (retval) { if (retval != last_retval && config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] [HOST#%lu] [WU#%lu %s] WU is infeasible: %s\n", g_reply->host.id, wu.id, wu.name, infeasible_string(retval) ); } last_retval = retval; if (config.debug_send_job) { log_messages.printf(MSG_NORMAL, "[send_job] is_infeasible_fast() failed; skipping\n" ); } return false; } return true; }
// do fast checks on this job, i.e. ones that don't require DB access // if any check fails, return false // static bool quick_check( WU_RESULT& wu_result, WORKUNIT& wu, BEST_APP_VERSION* &bavp, APP* &app, int& last_retval ) { int retval; if (wu_result.state != WR_STATE_PRESENT && wu_result.state != g_pid) { return false; } app = ssp->lookup_app(wu_result.workunit.appid); if (app == NULL) { return false; // this should never happen } g_wreq->no_jobs_available = false; // If we're looking for beta jobs and this isn't one, skip it // if (g_wreq->beta_only) { if (!app->beta) { return false; } if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [HOST#%d] beta work found: [RESULT#%d]\n", g_reply->host.id, wu_result.resultid ); } } else { if (app->beta) { return false; } } // If this is a reliable host and we are checking for results that // need a reliable host, then continue if the result is a normal result // skip if the app is beta (beta apps don't use the reliable mechanism) // if (!app->beta) { if (g_wreq->reliable_only && (!wu_result.need_reliable)) { return false; } else if (!g_wreq->reliable_only && wu_result.need_reliable) { return false; } } // don't send if we are looking for infeasible results // and the result is not infeasible // if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) { return false; } // check app filter if needed // if (g_wreq->user_apps_only && (!g_wreq->beta_only || config.distinct_beta_apps) ) { if (app_not_selected(wu)) { g_wreq->no_allowed_apps_available = true; #if 0 if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [USER#%d] [WU#%d] user doesn't want work for app %s\n", g_reply->user.id, wu.id, app->name ); } #endif return false; } } // Find the app and best app_version for this host. // bavp = get_app_version(wu, true, g_wreq->reliable_only); if (!bavp) { if (config.debug_array) { log_messages.printf(MSG_NORMAL, "[array] No app version\n" ); } return false; } // don't send job if host can't handle it // retval = wu_is_infeasible_fast( wu, wu_result.res_server_state, wu_result.res_priority, wu_result.res_report_deadline, *app, *bavp ); if (retval) { if (retval != last_retval && config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [HOST#%d] [WU#%d %s] WU is infeasible: %s\n", g_reply->host.id, wu.id, wu.name, infeasible_string(retval) ); } last_retval = retval; if (config.debug_array) { log_messages.printf(MSG_NORMAL, "[array] infeasible\n"); } return false; } return true; }
// send a job for the given assignment // static int send_assigned_job(ASSIGNMENT& asg) { int retval; DB_WORKUNIT wu; char suffix[256], path[MAXPATHLEN]; const char *rtfpath; static bool first=true; static int seqno=0; static R_RSA_PRIVATE_KEY key; BEST_APP_VERSION* bavp; if (first) { first = false; sprintf(path, "%s/upload_private", config.key_dir); retval = read_key_file(path, key); if (retval) { log_messages.printf(MSG_CRITICAL, "can't read key\n"); return -1; } } retval = wu.lookup_id(asg.workunitid); if (retval) { log_messages.printf(MSG_CRITICAL, "assigned WU %lu not found\n", asg.workunitid ); return retval; } if (app_not_selected(wu.appid)) { log_messages.printf(MSG_CRITICAL, "Assigned WU %s is for app not selected by user\n", wu.name ); return -1; } bavp = get_app_version(wu, false, false); if (!bavp) { log_messages.printf(MSG_CRITICAL, "App version for assigned WU not found\n" ); return ERR_NOT_FOUND; } rtfpath = config.project_path("%s", wu.result_template_file); sprintf(suffix, "%d_%d_%d", getpid(), (int)time(0), seqno++); retval = create_result( wu, const_cast<char*>(rtfpath), suffix, key, config, 0, 0 ); if (retval) { log_messages.printf(MSG_CRITICAL, "[WU#%lu %s] create_result(): %s\n", wu.id, wu.name, boincerror(retval) ); return retval; } DB_ID_TYPE result_id = boinc_db.insert_id(); SCHED_DB_RESULT result; retval = result.lookup_id(result_id); add_result_to_reply(result, wu, bavp, false); if (config.debug_assignment) { log_messages.printf(MSG_NORMAL, "[assign] [WU#%lu] [RESULT#%lu] [HOST#%lu] send assignment %lu\n", wu.id, result_id, g_reply->host.id, asg.id ); } return 0; }
// Assign a score to this job, // representing the value of sending the job to this host. // Also do some initial screening, // and return false if can't send the job to host // bool JOB::get_score(WU_RESULT& wu_result) { score = 0; if (!app->beta && wu_result.need_reliable) { if (!bavp->reliable) { return false; } } if (app->beta) { if (g_wreq->allow_beta_work) { score += 1; } else { return false; } } if (app_not_selected(wu_result.workunit)) { if (g_wreq->allow_non_preferred_apps) { score -= 1; } else { return false; } } if (wu_result.infeasible_count) { score += 1; } if (app->locality_scheduling == LOCALITY_SCHED_LITE && g_request->file_infos.size() ) { int n = nfiles_on_host(wu_result.workunit); if (config.debug_locality_lite) { log_messages.printf(MSG_NORMAL, "[loc_lite] job %s has %d files on this host\n", wu_result.workunit.name, n ); } if (n > 0) { score += 10; } } if (app->n_size_classes > 1) { double effective_speed = bavp->host_usage.projected_flops * g_reply->host.on_frac * g_reply->host.active_frac; int target_size = get_size_class(*app, effective_speed); if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] size: host %d job %d speed %f\n", target_size, wu_result.workunit.size_class, effective_speed ); } if (target_size == wu_result.workunit.size_class) { score += 5; } else if (target_size < wu_result.workunit.size_class) { score -= 2; } else { score -= 1; } } if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send]: job score %f\n", score ); } return true; }
// try to send jobs for non-CPU-intensive (NCI) apps // for which the host doesn't have a job in progress // int send_nci() { int retval; vector<APP> nci_apps; char buf[1024]; if (config.debug_send) { log_messages.printf(MSG_NORMAL, "checking for NCI jobs\n"); } // make a vector of NCI apps // for (int i=0; i<ssp->napps; i++) { if (!ssp->apps[i].non_cpu_intensive) continue; APP app = ssp->apps[i]; app.have_job = false; nci_apps.push_back(app); } // scan through the list of in-progress jobs, // flagging the associated apps as having jobs // for (unsigned int i=0; i<g_request->other_results.size(); i++) { DB_RESULT r; OTHER_RESULT &ores = g_request->other_results[i]; sprintf(buf, "where name='%s'", ores.name); retval = r.lookup(buf); if (retval) { log_messages.printf(MSG_NORMAL, "No such result: %s\n", ores.name); continue; } for (unsigned int j=0; j<nci_apps.size(); j++) { APP& app = nci_apps[j]; if (app.id == r.appid) { app.have_job = true; break; } } } // For each NCI app w/o a job, try to send one // for (unsigned int i=0; i<nci_apps.size(); i++) { APP& app = nci_apps[i]; if (app.have_job) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "Already have job for %s\n", app.name ); } continue; } if (app.beta && !g_wreq->project_prefs.allow_beta_work) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "%s is beta\n", app.name); } continue; } if (app_not_selected(app.id)) { if (!g_wreq->project_prefs.allow_non_preferred_apps) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "%s is not selected\n", app.name ); } continue; } } retval = send_job_for_app(app); if (retval) { log_messages.printf(MSG_NORMAL, "failed to send job for NCI app %s\n", app.user_friendly_name ); } } return 0; }