Esempio n. 1
0
// do fast checks on this job, i.e. ones that don't require DB access
// if any check fails, return false
//
static bool quick_check(
    WU_RESULT& wu_result,
    WORKUNIT& wu,       // a mutable copy of wu_result.workunit.
        // We may modify its delay_bound, rsc_fpops_est, and rsc_fpops_bound
    BEST_APP_VERSION* &bavp,
    APP* app,
    int& last_retval
) {
    int retval;

    // If we're looking for beta jobs and this isn't one, skip it
    //
    if (g_wreq->beta_only) {
        if (!app->beta) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job is not from beta app; skipping\n"
                );
            }
            return false;
        }
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] [HOST#%lu] beta work found: [RESULT#%lu]\n",
                g_reply->host.id, wu_result.resultid
            );
        }
    } else {
        if (app->beta) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job is from beta app; skipping\n"
                );
            }
            return false;
        }
    }

    // Are we scanning for need_reliable results?
    // skip this check the app is beta
    // (beta apps don't use the reliable mechanism)
    //
    if (!app->beta) {
        if (g_wreq->reliable_only && (!wu_result.need_reliable)) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job doesn't need reliable host; skipping\n"
                );
            }
            return false;
        } else if (!g_wreq->reliable_only && wu_result.need_reliable) {
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] job needs reliable host; skipping\n"
                );
            }
            return false;
        }
    }

    // don't send if we are looking for infeasible results
    // and the result is not infeasible
    //
    if (g_wreq->infeasible_only && (wu_result.infeasible_count==0)) {
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] job is not infeasible; skipping\n"
            );
        }
        return false;
    }

    // locality sched lite check.
    // Allow non-LSL jobs; otherwise we could starve them
    // NOTE: THIS NEGATES THE OTHER SCHED POLICIES (reliable, etc.).
    // Need to think of some way of combining them.
    //
    if (g_wreq->locality_sched_lite) {
        // skip this job if host has sticky files
        // but none of them is used by this job.
        // TODO: it should really be "host has sticky files for this app".
        // However, we don't have a way of making that association.
        // Could add something based on filename
        //
        if (app->locality_scheduling == LOCALITY_SCHED_LITE
            && g_request->file_infos.size()
        ) {
            int n = nfiles_on_host(wu_result.workunit);
            if (config.debug_locality_lite) {
                log_messages.printf(MSG_NORMAL,
                    "[loc_lite] job %s has %d files on this host\n",
                    wu_result.workunit.name, n
                );
            }
            if (n == 0) {
                return false;
            }
        }
    }

    // Find the best app_version for this host.
    //
    bavp = get_app_version(wu, true, g_wreq->reliable_only);
    if (!bavp) {
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] No app version for job; skipping\n"
            );
        }
        return false;
    }

    // Check app filter if needed.
    // Do this AFTER get_app_version(), otherwise we could send
    // a misleading message to user
    //
    if (g_wreq->user_apps_only &&
        (!g_wreq->beta_only || config.distinct_beta_apps)
    ) {
        if (app_not_selected(app->id)) {
            g_wreq->no_allowed_apps_available = true;
            if (config.debug_send_job) {
                log_messages.printf(MSG_NORMAL,
                    "[send_job] [USER#%lu] [WU#%lu] user doesn't want work for app %s\n",
                    g_reply->user.id, wu.id, app->name
                );
            }
            return false;
        }
    }

    // Check whether we can send this job.
    // This may modify wu.delay_bound and wu.rsc_fpops_est
    //
    retval = wu_is_infeasible_fast(
        wu,
        wu_result.res_server_state, wu_result.res_priority,
        wu_result.res_report_deadline,
        *app, *bavp
    );
    if (retval) {
        if (retval != last_retval && config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] [HOST#%lu] [WU#%lu %s] WU is infeasible: %s\n",
                g_reply->host.id, wu.id, wu.name, infeasible_string(retval)
            );
        }
        last_retval = retval;
        if (config.debug_send_job) {
            log_messages.printf(MSG_NORMAL,
                "[send_job] is_infeasible_fast() failed; skipping\n"
            );
        }
        return false;
    }
    return true;
}
Esempio n. 2
0
// Assign a score to this job,
// representing the value of sending the job to this host.
// Also do some initial screening,
// and return false if can't send the job to host
//
bool JOB::get_score(WU_RESULT& wu_result) {
    score = 0;

    if (!app->beta && wu_result.need_reliable) {
        if (!bavp->reliable) {
            return false;
        }
    }

    if (app->beta) {
        if (g_wreq->allow_beta_work) {
            score += 1;
        } else {
            return false;
        }
    }

    if (app_not_selected(wu_result.workunit)) {
        if (g_wreq->allow_non_preferred_apps) {
            score -= 1;
        } else {
            return false;
        }
    }

    if (wu_result.infeasible_count) {
        score += 1;
    }

    if (app->locality_scheduling == LOCALITY_SCHED_LITE
        && g_request->file_infos.size()
    ) {
        int n = nfiles_on_host(wu_result.workunit);
        if (config.debug_locality_lite) {
            log_messages.printf(MSG_NORMAL,
                "[loc_lite] job %s has %d files on this host\n",
                wu_result.workunit.name, n
            );
        }
        if (n > 0) {
            score += 10;
        }
    }

    if (app->n_size_classes > 1) {
        double effective_speed = bavp->host_usage.projected_flops * g_reply->host.on_frac * g_reply->host.active_frac;
        int target_size = get_size_class(*app, effective_speed);
        if (config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "[send] size: host %d job %d speed %f\n",
                target_size, wu_result.workunit.size_class, effective_speed
            );
        }
        if (target_size == wu_result.workunit.size_class) {
            score += 5;
        } else if (target_size < wu_result.workunit.size_class) {
            score -= 2;
        } else {
            score -= 1;
        }
    }
    if (config.debug_send) {
        log_messages.printf(MSG_NORMAL,
            "[send]: job score %f\n", score
        );
    }

    return true;
}