Beispiel #1
0
// Try to send the client this result
// This can fail because:
// - result needs more disk/mem/speed than host has
// - already sent a result for this WU
// - no app_version available
//
static int possibly_send_result(SCHED_DB_RESULT& result) {
    DB_WORKUNIT wu;
    SCHED_DB_RESULT result2;
    int retval;
    long count;
    char buf[256];
    BEST_APP_VERSION* bavp;

    g_wreq->no_jobs_available = false;

    retval = wu.lookup_id(result.workunitid);
    if (retval) return ERR_DB_NOT_FOUND;

    // This doesn't take into account g_wreq->allow_non_selected_apps,
    // however Einstein@Home, which is the only project that currently uses
    // this locality scheduler, doesn't support the respective project-specific
    // preference setting
    //
    if (app_not_selected(wu.appid)) return ERR_NO_APP_VERSION;

    bavp = get_app_version(wu, true, false);

    if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) {
        char help_msg_buf[512];
        sprintf(help_msg_buf,
            "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
            config.long_name
        );
        g_reply->insert_message(help_msg_buf, "notice");
        g_reply->set_delay(DELAY_ANONYMOUS);
    }

    if (!bavp) return ERR_NO_APP_VERSION;

    APP* app = ssp->lookup_app(wu.appid);
    retval = wu_is_infeasible_fast(
        wu, result.server_state, result.report_deadline, result.priority,
        *app, *bavp
    );
    if (retval) return retval;

    if (config.one_result_per_user_per_wu) {
        sprintf(buf, "where userid=%lu and workunitid=%lu", g_reply->user.id, wu.id);
        retval = result2.count(count, buf);
        if (retval) return ERR_DB_NOT_FOUND;
        if (count > 0) return ERR_WU_USER_RULE;
    }

    return add_result_to_reply(result, wu, bavp, true);
}
Beispiel #2
0
// do fast checks on this job, i.e. ones that don't require DB access
// if any check fails, return false
//
static bool quick_check(
    WU_RESULT& wu_result,
    WORKUNIT& wu,       // a mutable copy of wu_result.workunit.
        // We may modify its delay_bound, rsc_fpops_est, and rsc_fpops_bound
    BEST_APP_VERSION* &bavp,
    APP* app,
    int& last_retval
) {
    int retval;

    // If we're looking for beta jobs and this isn't one, skip it
    //
    if (g_wreq->beta_only) {
        if (!app->beta) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job is not from beta app; skipping\n"
                );
            }
            return false;
        }
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] [HOST#%lu] beta work found: [RESULT#%lu]\n",
                g_reply->host.id, wu_result.resultid
            );
        }
    } else {
        if (app->beta) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job is from beta app; skipping\n"
                );
            }
            return false;
        }
    }

    // Are we scanning for need_reliable results?
    // skip this check the app is beta
    // (beta apps don't use the reliable mechanism)
    //
    if (!app->beta) {
        if (g_wreq->reliable_only && (!wu_result.need_reliable)) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job doesn't need reliable host; skipping\n"
                );
            }
            return false;
        } else if (!g_wreq->reliable_only && wu_result.need_reliable) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job needs reliable host; skipping\n"
                );
            }
            return false;
        }
    }

    // don't send if we are looking for infeasible results
    // and the result is not infeasible
    //
    if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) {
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] job is not infeasible; skipping\n"
            );
        }
        return false;
    }

    // locality sched lite check.
    // Allow non-LSL jobs; otherwise we could starve them
    // NOTE: THIS NEGATES THE OTHER SCHED POLICIES (reliable, etc.).
    // Need to think of some way of combining them.
    //
    if (g_wreq->locality_sched_lite) {
        // skip this job if host has sticky files
        // but none of them is used by this job.
        // TODO: it should really be "host has sticky files for this app".
        // However, we don't have a way of making that association.
        // Could add something based on filename
        //
        if (app->locality_scheduling == LOCALITY_SCHED_LITE
            && g_request->file_infos.size()
        ) {
            int n = nfiles_on_host(wu_result.workunit);
            if (config.debug_locality_lite) {
                log_messages.printf(MSG_NORMAL,
                    "[loc_lite] job %s has %d files on this host\n",
                    wu_result.workunit.name, n
                );
            }
            if (n == 0) {
                return false;
            }
        }
    }

    // Find the best app_version for this host.
    //
    bavp = get_app_version(wu, true, g_wreq->reliable_only);
    if (!bavp) {
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] No app version for job; skipping\n"
            );
        }
        return false;
    }

    // Check app filter if needed.
    // Do this AFTER get_app_version(), otherwise we could send
    // a misleading message to user
    //
    if (g_wreq->user_apps_only &&
        (!g_wreq->beta_only || config.distinct_beta_apps)
    ) {
        if (app_not_selected(app->id)) {
            g_wreq->no_allowed_apps_available = true;
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] [USER#%lu] [WU#%lu] user doesn't want work for app %s\n",
                    g_reply->user.id, wu.id, app->name
                );
            }
            return false;
        }
    }

    // Check whether we can send this job.
    // This may modify wu.delay_bound and wu.rsc_fpops_est
    //
    retval = wu_is_infeasible_fast(
        wu,
        wu_result.res_server_state, wu_result.res_priority,
        wu_result.res_report_deadline,
        *app, *bavp
    );
    if (retval) {
        if (retval != last_retval && config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] [HOST#%lu] [WU#%lu %s] WU is infeasible: %s\n",
                g_reply->host.id, wu.id, wu.name, infeasible_string(retval)
            );
        }
        last_retval = retval;
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] is_infeasible_fast() failed; skipping\n"
            );
        }
        return false;
    }
    return true;
}
// do fast checks on this job, i.e. ones that don't require DB access
// if any check fails, return false
//
static bool quick_check(
    WU_RESULT& wu_result, WORKUNIT& wu, BEST_APP_VERSION* &bavp,
    APP* &app, int& last_retval
) {
    int retval;

    if (wu_result.state != WR_STATE_PRESENT && wu_result.state != g_pid) {
        return false;
    }
    
    app = ssp->lookup_app(wu_result.workunit.appid);
    if (app == NULL) {
        return false; // this should never happen
    }

    g_wreq->no_jobs_available = false;

    // If we're looking for beta jobs and this isn't one, skip it
    //
    if (g_wreq->beta_only) {
        if (!app->beta) {
            return false;
        }
        if (config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "[send] [HOST#%d] beta work found: [RESULT#%d]\n",
                g_reply->host.id, wu_result.resultid
            );
        }
    } else {
        if (app->beta) {
            return false;
        }
    }
    
    // If this is a reliable host and we are checking for results that
    // need a reliable host, then continue if the result is a normal result
    // skip if the app is beta (beta apps don't use the reliable mechanism)
    //
    if (!app->beta) {
        if (g_wreq->reliable_only && (!wu_result.need_reliable)) {
            return false;
        } else if (!g_wreq->reliable_only && wu_result.need_reliable) {
            return false;
        }
    }
    
    // don't send if we are looking for infeasible results
    // and the result is not infeasible
    //
    if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) {
        return false;
    }
    
    // check app filter if needed
    //
    if (g_wreq->user_apps_only &&
        (!g_wreq->beta_only || config.distinct_beta_apps)
    ) {
        if (app_not_selected(wu)) {
            g_wreq->no_allowed_apps_available = true;
#if 0
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [USER#%d] [WU#%d] user doesn't want work for app %s\n",
                    g_reply->user.id, wu.id, app->name
                );
            }
#endif
            return false;
        }
    }

    // Find the app and best app_version for this host.
    //
    bavp = get_app_version(wu, true, g_wreq->reliable_only);
    if (!bavp) {
        if (config.debug_array) {
            log_messages.printf(MSG_NORMAL,
                "[array] No app version\n"
            );
        }
        return false;
    }

    // don't send job if host can't handle it
    //
    retval = wu_is_infeasible_fast(
        wu,
        wu_result.res_server_state, wu_result.res_priority,
        wu_result.res_report_deadline,
        *app, *bavp
    );
    if (retval) {
        if (retval != last_retval && config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "[send] [HOST#%d] [WU#%d %s] WU is infeasible: %s\n",
                g_reply->host.id, wu.id, wu.name, infeasible_string(retval)
            );
        }
        last_retval = retval;
        if (config.debug_array) {
            log_messages.printf(MSG_NORMAL, "[array] infeasible\n");
        }
        return false;
    }
    return true;
}
Beispiel #4
0
// send a job for the given assignment
//
static int send_assigned_job(ASSIGNMENT& asg) {
    int retval;
    DB_WORKUNIT wu;
    char suffix[256], path[MAXPATHLEN];
    const char *rtfpath;
    static bool first=true;
    static int seqno=0;
    static R_RSA_PRIVATE_KEY key;
    BEST_APP_VERSION* bavp;
                                 
    if (first) {
        first = false;
        sprintf(path, "%s/upload_private", config.key_dir);
        retval = read_key_file(path, key);
        if (retval) {
            log_messages.printf(MSG_CRITICAL, "can't read key\n");
            return -1;
        }

    }
    retval = wu.lookup_id(asg.workunitid);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "assigned WU %lu not found\n", asg.workunitid
        );
        return retval;
    }

    if (app_not_selected(wu.appid)) {
        log_messages.printf(MSG_CRITICAL,
            "Assigned WU %s is for app not selected by user\n", wu.name
        );
        return -1;
    }

    bavp = get_app_version(wu, false, false);
    if (!bavp) {
        log_messages.printf(MSG_CRITICAL,
            "App version for assigned WU not found\n"
        );
        return ERR_NOT_FOUND;
    }

    rtfpath = config.project_path("%s", wu.result_template_file);
    sprintf(suffix, "%d_%d_%d", getpid(), (int)time(0), seqno++);
    retval = create_result(
        wu, const_cast<char*>(rtfpath), suffix, key, config, 0, 0
    );
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "[WU#%lu %s] create_result(): %s\n", wu.id, wu.name, boincerror(retval)
        );
        return retval;
    }
    DB_ID_TYPE result_id = boinc_db.insert_id();
    SCHED_DB_RESULT result;
    retval = result.lookup_id(result_id);
    add_result_to_reply(result, wu, bavp, false);

    if (config.debug_assignment) {
        log_messages.printf(MSG_NORMAL,
            "[assign] [WU#%lu] [RESULT#%lu] [HOST#%lu] send assignment %lu\n",
            wu.id, result_id, g_reply->host.id, asg.id
        );
    }
    return 0;
}
Beispiel #5
0
// Assign a score to this job,
// representing the value of sending the job to this host.
// Also do some initial screening,
// and return false if can't send the job to host
//
bool JOB::get_score(WU_RESULT& wu_result) {
    score = 0;

    if (!app->beta && wu_result.need_reliable) {
        if (!bavp->reliable) {
            return false;
        }
    }

    if (app->beta) {
        if (g_wreq->allow_beta_work) {
            score += 1;
        } else {
            return false;
        }
    }

    if (app_not_selected(wu_result.workunit)) {
        if (g_wreq->allow_non_preferred_apps) {
            score -= 1;
        } else {
            return false;
        }
    }

    if (wu_result.infeasible_count) {
        score += 1;
    }

    if (app->locality_scheduling == LOCALITY_SCHED_LITE
        && g_request->file_infos.size()
    ) {
        int n = nfiles_on_host(wu_result.workunit);
        if (config.debug_locality_lite) {
            log_messages.printf(MSG_NORMAL,
                "[loc_lite] job %s has %d files on this host\n",
                wu_result.workunit.name, n
            );
        }
        if (n > 0) {
            score += 10;
        }
    }

    if (app->n_size_classes > 1) {
        double effective_speed = bavp->host_usage.projected_flops * g_reply->host.on_frac * g_reply->host.active_frac;
        int target_size = get_size_class(*app, effective_speed);
        if (config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "[send] size: host %d job %d speed %f\n",
                target_size, wu_result.workunit.size_class, effective_speed
            );
        }
        if (target_size == wu_result.workunit.size_class) {
            score += 5;
        } else if (target_size < wu_result.workunit.size_class) {
            score -= 2;
        } else {
            score -= 1;
        }
    }
    if (config.debug_send) {
        log_messages.printf(MSG_NORMAL,
            "[send]: job score %f\n", score
        );
    }

    return true;
}
Beispiel #6
0
// try to send jobs for non-CPU-intensive (NCI) apps
// for which the host doesn't have a job in progress
//
int send_nci() {
    int retval;
    vector<APP> nci_apps;
    char buf[1024];

    if (config.debug_send) {
        log_messages.printf(MSG_NORMAL, "checking for NCI jobs\n");
    }

    // make a vector of NCI apps
    //
    for (int i=0; i<ssp->napps; i++) {
        if (!ssp->apps[i].non_cpu_intensive) continue;
        APP app = ssp->apps[i];
        app.have_job = false;
        nci_apps.push_back(app);
    }

    // scan through the list of in-progress jobs,
    // flagging the associated apps as having jobs
    //
    for (unsigned int i=0; i<g_request->other_results.size(); i++) {
        DB_RESULT r;
        OTHER_RESULT &ores = g_request->other_results[i];
        sprintf(buf, "where name='%s'", ores.name);
        retval = r.lookup(buf);
        if (retval) {
            log_messages.printf(MSG_NORMAL, "No such result: %s\n", ores.name);
            continue;
        }
        for (unsigned int j=0; j<nci_apps.size(); j++) {
            APP& app = nci_apps[j];
            if (app.id == r.appid) {
                app.have_job = true;
                break;
            }
        }
    }

    // For each NCI app w/o a job, try to send one
    //
    for (unsigned int i=0; i<nci_apps.size(); i++) {
        APP& app = nci_apps[i];
        if (app.have_job) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "Already have job for %s\n", app.name
                );
            }
            continue;
        }
        if (app.beta  && !g_wreq->project_prefs.allow_beta_work) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL, "%s is beta\n", app.name);
            }
            continue;
        }
        if (app_not_selected(app.id)) {
            if (!g_wreq->project_prefs.allow_non_preferred_apps) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "%s is not selected\n", app.name
                    );
                }
                continue;
            }
        }
        retval = send_job_for_app(app);
        if (retval) {
            log_messages.printf(MSG_NORMAL,
                "failed to send job for NCI app %s\n", app.user_friendly_name
            );
        }
    }
    return 0;
}