Ejemplo n.º 1
0
// return true if HR rules out sending any work to this host
//
bool hr_unknown_platform(HOST& host) {
    for (int i=0; i<ssp->napps; i++) {
        APP& app = ssp->apps[i];
        if (!hr_unknown_class(host, app_hr_type(app))) return false;
    }
    return true;
}
Ejemplo n.º 2
0
// check for HR compatibility
//
bool already_sent_to_different_hr_class(WORKUNIT& wu, APP& app) {
    g_wreq->hr_reject_temp = false;
    int host_hr_class = hr_class(g_request->host, app_hr_type(app));
    if (wu.hr_class && (host_hr_class != wu.hr_class)) {
        g_wreq->hr_reject_temp = true;
        return true;
    }
    return false;
}
Ejemplo n.º 3
0
// Do checks that require DB access for whether we can send this job,
// and return:
// 0 if OK to send
// 1 if can't send to this host
// 2 if can't send to ANY host
//
int slow_check(
    WU_RESULT& wu_result,       // the job cache entry.
        // We may refresh its hr_class and app_version_id fields.
    APP* app,
    BEST_APP_VERSION* bavp      // the app version to be used
) {
    int n, retval;
    DB_RESULT result;
    char buf[256];
    WORKUNIT& wu = wu_result.workunit;

    // Don't send if we've already sent a result of this WU to this user.
    //
    if (config.one_result_per_user_per_wu) {
        sprintf(buf,
            "where workunitid=%d and userid=%d", wu.id, g_reply->user.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%s)\n", boincerror(retval)
            );
            return 1;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [USER#%d] already has %d result(s) for [WU#%u]\n",
                        g_reply->user.id, n, wu.id
                    );
                }
                return 1;
            }
        }
    } else if (config.one_result_per_host_per_wu) {
        // Don't send if we've already sent a result of this WU to this host.
        // We only have to check this if we don't send one result per user.
        //
        sprintf(buf,
            "where workunitid=%d and hostid=%d", wu.id, g_reply->host.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%s)\n", boincerror(retval)
            );
            return 1;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [HOST#%d] already has %d result(s) for [WU#%u]\n",
                        g_reply->host.id, n, wu.id
                    );
                }
                return 1;
            }
        }
    }

    // Checks that require looking up the WU.
    // Lump these together so we only do 1 lookup
    //
    if (app_hr_type(*app) || app->homogeneous_app_version) {
        DB_WORKUNIT db_wu;
        db_wu.id = wu.id;
        int vals[3];
        retval = db_wu.get_field_ints(
            "hr_class, app_version_id, error_mask", 3, vals
        );
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "can't get fields for [WU#%u]: %s\n", db_wu.id, boincerror(retval)
            );
            return 1;
        }

        // check wu.error_mask
        //
        if (vals[2] != 0) {
            return 2;
        }

        if (app_hr_type(*app)) {
            wu.hr_class = vals[0];
            if (already_sent_to_different_hr_class(wu, *app)) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [HOST#%d] [WU#%u %s] is assigned to different HR class\n",
                        g_reply->host.id, wu.id, wu.name
                    );
                }
                // Mark the workunit as infeasible.
                // This ensures that jobs already assigned to an HR class
                // are processed first.
                //
                wu_result.infeasible_count++;
                return 1;
            }
        }
        if (app->homogeneous_app_version) {
            int wu_avid = vals[1];
            wu.app_version_id = wu_avid;
            if (wu_avid && wu_avid != bavp->avp->id) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [HOST#%d] [WU#%u %s] is assigned to different app version\n",
                        g_reply->host.id, wu.id, wu.name
                    );
                }
                wu_result.infeasible_count++;
                return 1;
            }
        }
    }
    return 0;
}
Ejemplo n.º 4
0
// Fast checks (no DB access) to see if the job can be sent to the host.
// Reasons why not include:
// 1) the host doesn't have enough memory;
// 2) the host doesn't have enough disk space;
// 3) based on CPU speed, resource share and estimated delay,
//    the host probably won't get the result done within the delay bound
// 4) app isn't in user's "approved apps" list
//
// If the job is feasible, return 0 and fill in wu.delay_bound
// with the delay bound we've decided to use.
//
int wu_is_infeasible_fast(
    WORKUNIT& wu,
    int res_server_state, int res_priority, double res_report_deadline,
    APP& app, BEST_APP_VERSION& bav
) {
    int retval;

    // project-specific check
    //
    if (wu_is_infeasible_custom(wu, app, bav)) {
        return INFEASIBLE_CUSTOM;
    }

    if (config.user_filter) {
        if (wu.batch && wu.batch != g_reply->user.id) {
            return INFEASIBLE_USER_FILTER;
        }
    }

    // homogeneous redundancy: can't send if app uses HR and
    // 1) host is of unknown HR class, or
    // 2) WU is already committed to different HR class
    //
    if (app_hr_type(app)) {
        if (hr_unknown_class(g_reply->host, app_hr_type(app))) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [HOST#%d] [WU#%u %s] host is of unknown class in HR type %d\n",
                    g_reply->host.id, wu.id, wu.name, app_hr_type(app)
                );
            }
            return INFEASIBLE_HR;
        }
        if (already_sent_to_different_hr_class(wu, app)) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [HOST#%d] [WU#%u %s] failed quick HR check: WU is class %d, host is class %d\n",
                    g_reply->host.id, wu.id, wu.name, wu.hr_class, hr_class(g_request->host, app_hr_type(app))
                );
            }
            return INFEASIBLE_HR;
        }
    }

    // homogeneous app version
    //
    if (app.homogeneous_app_version) {
        int avid = wu.app_version_id;
        if (avid && bav.avp->id != avid) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [HOST#%d] [WU#%u %s] failed homogeneous app version check: %d %d\n",
                    g_reply->host.id, wu.id, wu.name, avid, bav.avp->id
                );
            }
            return INFEASIBLE_HAV;
        }
    }

    if (config.one_result_per_user_per_wu || config.one_result_per_host_per_wu) {
        if (wu_already_in_reply(wu)) {
            return INFEASIBLE_DUP;
        }
    }

    retval = check_memory(wu);
    if (retval) return retval;
    retval = check_disk(wu);
    if (retval) return retval;
    retval = check_bandwidth(wu);
    if (retval) return retval;

    if (app.non_cpu_intensive) {
        return 0;
    }

    // do deadline check last because EDF sim uses some CPU
    //
    double opt, pess;
    get_delay_bound_range(
        wu, res_server_state, res_priority, res_report_deadline, bav, opt, pess
    );
    wu.delay_bound = (int)opt;
    if (opt == 0) {
        // this is a resend; skip deadline check
        return 0;
    }
    retval = check_deadline(wu, app, bav);
    if (retval && (opt != pess)) {
        wu.delay_bound = (int)pess;
        retval = check_deadline(wu, app, bav);
    }
    return retval;
}
// do slow checks (ones that require DB access)
//
static bool slow_check(WU_RESULT& wu_result, WORKUNIT& wu, APP* app) {
    int n, retval;
    DB_RESULT result;
    char buf[256];

    // Don't send if we've already sent a result of this WU to this user.
    //
    if (config.one_result_per_user_per_wu) {
        sprintf(buf,
            "where workunitid=%d and userid=%d",
            wu_result.workunit.id, g_reply->user.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%d)\n", retval
            );
            return false;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [USER#%d] already has %d result(s) for [WU#%d]\n",
                        g_reply->user.id, n, wu_result.workunit.id
                    );
                }
                return false;
            }
        }
    } else if (config.one_result_per_host_per_wu) {
        // Don't send if we've already sent a result
        // of this WU to this host.
        // We only have to check this
        // if we don't send one result per user.
        //
        sprintf(buf,
            "where workunitid=%d and hostid=%d",
            wu_result.workunit.id, g_reply->host.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%d)\n", retval
            );
            return false;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] [HOST#%d] already has %d result(s) for [WU#%d]\n",
                        g_reply->host.id, n, wu_result.workunit.id
                    );
                }
                return false;
            }
        }
    }

    if (app_hr_type(*app)) {
        if (already_sent_to_different_platform_careful(
            wu_result.workunit, *app
        )) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [HOST#%d] [WU#%d %s] is assigned to different platform\n",
                    g_reply->host.id, wu.id, wu.name
                );
            }
            // Mark the workunit as infeasible.
            // This ensures that jobs already assigned to a platform
            // are processed first.
            //
            wu_result.infeasible_count++;
            return false;
        }
    }
    return true;
}
bool wu_is_infeasible_slow(
    WU_RESULT& wu_result, SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply
) {
    char buf[256];
    int retval;
    int n;
    DB_RESULT result;

    // Don't send if we've already sent a result of this WU to this user.
    //
    if (config.one_result_per_user_per_wu) {
        sprintf(buf,
            "where workunitid=%d and userid=%d",
            wu_result.workunit.id, g_reply->user.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%d)\n", retval
            );
            return true;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] send_work: user %d already has %d result(s) for WU %d\n",
                        g_reply->user.id, n, wu_result.workunit.id
                    );
                }
                return true;
            }
        }
    } else if (config.one_result_per_host_per_wu) {
        // Don't send if we've already sent a result
        // of this WU to this host.
        // We only have to check this
        // if we don't send one result per user.
        //
        sprintf(buf,
            "where workunitid=%d and hostid=%d",
            wu_result.workunit.id, g_reply->host.id
        );
        retval = result.count(n, buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "send_work: can't get result count (%d)\n", retval
            );
            return true;
        } else {
            if (n>0) {
                if (config.debug_send) {
                    log_messages.printf(MSG_NORMAL,
                        "[send] send_work: host %d already has %d result(s) for WU %d\n",
                        g_reply->host.id, n, wu_result.workunit.id
                    );
                }
                return true;
            }
        }
    }

    APP* app = ssp->lookup_app(wu_result.workunit.appid);
    WORKUNIT wu = wu_result.workunit;
    if (app_hr_type(*app)) {
        if (already_sent_to_different_platform_careful(wu, *app)) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [HOST#%d] [WU#%d %s] WU is infeasible (assigned to different platform)\n",
                    g_reply->host.id, wu.id, wu.name
                );
            }
            // Mark the workunit as infeasible.
            // This ensures that jobs already assigned to a platform
            // are processed first.
            //
            wu_result.infeasible_count++;
            return true;
        }
    }
    return false;
}