// send a job for the given assignment
//
static int send_assigned_job(ASSIGNMENT& asg) {
    int retval;
    DB_WORKUNIT wu;
    char suffix[256], path[MAXPATHLEN];
    const char *rtfpath;
    static bool first=true;
    static int seqno=0;
    static R_RSA_PRIVATE_KEY key;
    BEST_APP_VERSION* bavp;
                                 
    if (first) {
        first = false;
        sprintf(path, "%s/upload_private", config.key_dir);
        retval = read_key_file(path, key);
        if (retval) {
            log_messages.printf(MSG_CRITICAL, "can't read key\n");
            return -1;
        }

    }
    retval = wu.lookup_id(asg.workunitid);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "assigned WU %d not found\n", asg.workunitid
        );
        return retval;
    }

    bavp = get_app_version(wu, false, false);
    if (!bavp) {
        log_messages.printf(MSG_CRITICAL,
            "App version for assigned WU not found\n"
        );
        return ERR_NOT_FOUND;
    }

    rtfpath = config.project_path("%s", wu.result_template_file);
    sprintf(suffix, "%d_%d_%d", getpid(), (int)time(0), seqno++);
    retval = create_result(
		wu, const_cast<char*>(rtfpath), suffix, key, config, 0, 0
	);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "[WU#%d %s] create_result(): %s\n", wu.id, wu.name, boincerror(retval)
        );
        return retval;
    }
    int result_id = boinc_db.insert_id();
    SCHED_DB_RESULT result;
    retval = result.lookup_id(result_id);
    add_result_to_reply(result, wu, bavp, false);

    if (config.debug_assignment) {
        log_messages.printf(MSG_NORMAL,
            "[assign] [WU#%d] [RESULT#%d] [HOST#%d] send assignment %d\n",
            wu.id, result_id, g_reply->host.id, asg.id
        );
    }
    return 0;
}
Beispiel #2
0
// Try to send the client this result
// This can fail because:
// - result needs more disk/mem/speed than host has
// - already sent a result for this WU
// - no app_version available
//
static int possibly_send_result(SCHED_DB_RESULT& result) {
    DB_WORKUNIT wu;
    SCHED_DB_RESULT result2;
    int retval;
    long count;
    char buf[256];
    BEST_APP_VERSION* bavp;

    g_wreq->no_jobs_available = false;

    retval = wu.lookup_id(result.workunitid);
    if (retval) return ERR_DB_NOT_FOUND;

    // This doesn't take into account g_wreq->allow_non_selected_apps,
    // however Einstein@Home, which is the only project that currently uses
    // this locality scheduler, doesn't support the respective project-specific
    // preference setting
    //
    if (app_not_selected(wu.appid)) return ERR_NO_APP_VERSION;

    bavp = get_app_version(wu, true, false);

    if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) {
        char help_msg_buf[512];
        sprintf(help_msg_buf,
            "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
            config.long_name
        );
        g_reply->insert_message(help_msg_buf, "notice");
        g_reply->set_delay(DELAY_ANONYMOUS);
    }

    if (!bavp) return ERR_NO_APP_VERSION;

    APP* app = ssp->lookup_app(wu.appid);
    retval = wu_is_infeasible_fast(
        wu, result.server_state, result.report_deadline, result.priority,
        *app, *bavp
    );
    if (retval) return retval;

    if (config.one_result_per_user_per_wu) {
        sprintf(buf, "where userid=%lu and workunitid=%lu", g_reply->user.id, wu.id);
        retval = result2.count(count, buf);
        if (retval) return ERR_DB_NOT_FOUND;
        if (count > 0) return ERR_WU_USER_RULE;
    }

    return add_result_to_reply(result, wu, bavp, true);
}
int handle_result(DB_RESULT& result) {
    DB_WORKUNIT wu;
    int retval;
    char path[256];
    char buf[256];
    FILE* f;

    retval = wu.lookup_id(result.workunitid);
    if (retval) {
        printf(
            "ERROR: can't find WU %d for result %d\n",
            result.workunitid, result.id
        );
        return 1;
    }
    get_file_path(wu, path);
    f = fopen(path, "r");
    if (f) {
        fclose(f);
    } else {
        printf("no file %s for result %d\n",
               path, result.id
              );
        if (repair) {
            if (result.server_state == RESULT_SERVER_STATE_UNSENT) {
                result.server_state = RESULT_SERVER_STATE_OVER;
                result.outcome = RESULT_OUTCOME_COULDNT_SEND;
                sprintf(
                    buf,"server_state=%d, outcome=%d",
                    result.server_state, result.outcome
                );
                retval = result.update_field(buf);
                if (retval) {
                    printf(
                        "ERROR: can't update result %d\n",
                        result.id
                    );
                    return 1;
                }
            }
        }
        return 1;
    }
    return 0;
}
// Try to send the client this result
// This can fail because:
// - result needs more disk/mem/speed than host has
// - already sent a result for this WU
// - no app_version available
//
static int possibly_send_result(SCHED_DB_RESULT& result) {
    DB_WORKUNIT wu;
    SCHED_DB_RESULT result2;
    int retval, count;
    char buf[256];
    BEST_APP_VERSION* bavp;

    g_wreq->no_jobs_available = false;

    retval = wu.lookup_id(result.workunitid);
    if (retval) return ERR_DB_NOT_FOUND;

    bavp = get_app_version(wu, true, false);

    if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) {
        char help_msg_buf[512];
        sprintf(help_msg_buf,
            "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
            config.long_name
        );
        g_reply->insert_message(help_msg_buf, "notice");
        g_reply->set_delay(DELAY_ANONYMOUS);
    }

    if (!bavp) return ERR_NO_APP_VERSION;

    APP* app = ssp->lookup_app(wu.appid);
    retval = wu_is_infeasible_fast(
        wu, result.server_state, result.report_deadline, result.priority,
        *app, *bavp
    );
    if (retval) return retval;

    if (config.one_result_per_user_per_wu) {
        sprintf(buf, "where userid=%d and workunitid=%d", g_reply->user.id, wu.id);
        retval = result2.count(count, buf);
        if (retval) return ERR_DB_NOT_FOUND;
        if (count > 0) return ERR_WU_USER_RULE;
    }

    return add_result_to_reply(result, wu, bavp, true);
}
// Try to send the client this result
// This can fail because:
// - result needs more disk/mem/speed than host has
// - already sent a result for this WU
// - no app_version available
//
static int possibly_send_result(DB_RESULT& result) {
    DB_WORKUNIT wu;
    DB_RESULT result2;
    int retval, count;
    char buf[256];
    BEST_APP_VERSION* bavp;

    retval = wu.lookup_id(result.workunitid);
    if (retval) return ERR_DB_NOT_FOUND;

    bavp = get_app_version(wu, true);

    if (!bavp && anonymous(g_request->platforms.list[0])) {
        char help_msg_buf[512];
        sprintf(help_msg_buf,
            "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.",
            config.long_name
        );
        g_reply->insert_message(USER_MESSAGE(help_msg_buf, "high"));
        g_reply->set_delay(DELAY_ANONYMOUS);
    }

    if (!bavp) return ERR_NO_APP_VERSION;

    APP* app = ssp->lookup_app(wu.appid);
    if (wu_is_infeasible_fast(wu, *app, *bavp)) {
        return ERR_INSUFFICIENT_RESOURCE;
    }

    if (config.one_result_per_user_per_wu) {
        sprintf(buf, "where userid=%d and workunitid=%d", g_reply->user.id, wu.id);
        retval = result2.count(count, buf);
        if (retval) return ERR_DB_NOT_FOUND;
        if (count > 0) return ERR_WU_USER_RULE;
    }

    return add_result_to_reply(result, wu, bavp, true);
}
// send non-multi assigned jobs
//
bool send_assigned_jobs() {
    DB_ASSIGNMENT asg;
    DB_RESULT result;
    DB_WORKUNIT wu;
    bool sent_something = false;
    int retval;

    // for now, only look for user assignments
    //
    char buf[256];
    sprintf(buf, "where target_type=%d and target_id=%d and multi=0",
        ASSIGN_USER, g_reply->user.id
    );
    while (!asg.enumerate(buf)) {
        if (!work_needed(false)) continue; 

        // if the WU doesn't exist, delete the assignment record.
        //
        retval = wu.lookup_id(asg.workunitid);
        if (retval) {
            asg.delete_from_db();
            continue;
        }
        // don't send if WU is validation pending or completed,
        // or has transition pending
        //
        if (wu.need_validate) continue;
        if (wu.canonical_resultid) continue;
        if (wu.transition_time < time(0)) continue;

        // don't send if we already sent one to this host
        //
        sprintf(buf, "where workunitid=%d and hostid=%d",
            asg.workunitid,
            g_request->host.id
        );
        retval = result.lookup(buf);
        if (retval != ERR_DB_NOT_FOUND) continue;

        // don't send if there's already one in progress to this user
        //
        sprintf(buf,
            "where workunitid=%d and userid=%d and server_state=%d",
            asg.workunitid,
            g_reply->user.id,
            RESULT_SERVER_STATE_IN_PROGRESS
        );
        retval = result.lookup(buf);
        if (retval != ERR_DB_NOT_FOUND) continue;

        // OK, send the job
        //
        retval = send_assigned_job(asg);
        if (retval) continue;

        sent_something = true;

        // update the WU's transition time to time out this job
        //
        retval = wu.lookup_id(asg.workunitid);
        if (retval) continue;
        int new_tt = time(0) + wu.delay_bound;
        if (new_tt < wu.transition_time) {
            char buf2[256];
            sprintf(buf2, "transition_time=%d", new_tt);
            wu.update_field(buf2);
        }
    }
    return sent_something;
}
Beispiel #7
0
// resend any jobs that:
// 1) we already sent to this host;
// 2) are still in progress (i.e. haven't timed out) and
// 3) aren't present on the host
// Return true if there were any such jobs
//
bool resend_lost_work() {
    SCHED_DB_RESULT result;
    std::vector<DB_RESULT>results;
    unsigned int i;
    char buf[256];
    char warning_msg[256];
    bool did_any = false;
    int num_eligible_to_resend=0;
    int num_resent=0;
    BEST_APP_VERSION* bavp = NULL;
    APP* app = NULL;
    int retval;

    sprintf(buf, " where hostid=%d and server_state=%d ",
        g_reply->host.id, RESULT_SERVER_STATE_IN_PROGRESS
    );
    while (!result.enumerate(buf)) {
        if (!work_needed(false)) {
            result.end_enumerate();
            break;
        }

        bool found = false;
        for (i=0; i<g_request->other_results.size(); i++) {
            OTHER_RESULT& orp = g_request->other_results[i];
            if (!strcmp(orp.name, result.name)) {
                found = true;
                break;
            }
        }
        if (found) continue;

        num_eligible_to_resend++;
        if (config.debug_resend) {
            log_messages.printf(MSG_NORMAL,
                "[resend] [HOST#%d] found lost [RESULT#%u]: %s\n",
                g_reply->host.id, result.id, result.name
            );
        }

        DB_WORKUNIT wu;
        bool can_resend = true;
        retval = wu.lookup_id(result.workunitid);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "[HOST#%d] can't resend - WU not found for [RESULT#%u]\n",
                g_reply->host.id, result.id
            );
            can_resend = false;
        }
        if (can_resend) {
            app = ssp->lookup_app(wu.appid);
            bavp = get_app_version(wu, true, false);
            if (!bavp) {
                if (config.debug_resend) {
                    log_messages.printf(MSG_NORMAL,
                        "[HOST#%d] can't resend [RESULT#%u]: no app version for %s\n",
                        g_reply->host.id, result.id, app->name
                    );
                }
                can_resend = false;
            }
        }
        if (can_resend && wu.error_mask) {
            if (config.debug_resend) {
                log_messages.printf(MSG_NORMAL,
                    "[resend] skipping [RESULT#%u]: WU error mask %d\n",
                    result.id, wu.error_mask
                );
            }
            can_resend = false;
        }
        if (can_resend && wu.canonical_resultid) {
            if (config.debug_resend) {
                log_messages.printf(MSG_NORMAL,
                    "[resend] skipping [RESULT#%u]: already have canonical result\n",
                    result.id
                );
            }
            can_resend = false;
        }
        if (can_resend && wu_is_infeasible_fast(
            wu, result.server_state, result.priority, result.report_deadline,
            *app, *bavp
        )) {
            if (config.debug_resend) {
                log_messages.printf(MSG_NORMAL,
                    "[resend] skipping [RESULT#%u]: feasibility check failed\n",
                    result.id
                );
            }
            can_resend = false;
        }
        if (can_resend && possibly_give_result_new_deadline(result, wu, *bavp)) {
            if (config.debug_resend) {
                log_messages.printf(MSG_NORMAL,
                    "[resend] skipping [RESULT#%u]: deadline assignment failed\n",
                    result.id
                );
            }
            can_resend = false;
        }

        // If we can't resend this job for any of the above reasons,
        // make it time out so that the transitioner does the right thing.
        //
        if (!can_resend) {
            result.report_deadline = time(0)-1;
            retval = result.mark_as_sent(result.server_state, config.report_grace_period);
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                    "resend_lost_work: can't update result deadline: %s\n",
                    boincerror(retval)
                );
                continue;
            }

            retval = update_wu_on_send(
                wu, result.report_deadline + config.report_grace_period,
                *app, *bavp
            );
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                    "resend_lost_result: can't update WU transition time: %s\n",
                    boincerror(retval)
                );
                continue;
            }
            sprintf(warning_msg,
                "Didn't resend lost task %s (expired)", result.name
            );
            g_reply->insert_message(warning_msg, "low");
        } else {
            retval = add_result_to_reply(result, wu, bavp, false);
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                    "[HOST#%d] failed to send [RESULT#%u]\n",
                    g_reply->host.id, result.id
                );
                continue;
            }
            sprintf(warning_msg, "Resent lost task %s", result.name);
            g_reply->insert_message(warning_msg, "low");
            num_resent++;
            did_any = true;

            if (g_wreq->njobs_sent >= config.max_wus_to_send) {
                result.end_enumerate();
                break;
            }
        }
    }

    if (num_eligible_to_resend && config.debug_resend) {
        log_messages.printf(MSG_NORMAL,
            "[resend] [HOST#%d] %d lost results, resent %d\n",
            g_reply->host.id, num_eligible_to_resend, num_resent 
        );
    }

    return did_any;
}
static int send_assigned_job(ASSIGNMENT& asg) {
    int retval;
    DB_WORKUNIT wu;
    char suffix[256], path[256], buf[256];
    const char *rtfpath;
    static bool first=true;
    static int seqno=0;
    static R_RSA_PRIVATE_KEY key;
    BEST_APP_VERSION* bavp;
                                 
    if (first) {
        first = false;
        sprintf(path, "%s/upload_private", config.key_dir);
        retval = read_key_file(path, key);
        if (retval) {
            log_messages.printf(MSG_CRITICAL, "can't read key\n");
            return -1;
        }

    }
    retval = wu.lookup_id(asg.workunitid);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "assigned WU %d not found\n", asg.workunitid
        );
        return retval;
    }

    bavp = get_app_version(wu);
    if (!bavp) {
        log_messages.printf(MSG_CRITICAL,
            "App version for assigned WU not found\n"
        );
        return ERR_NOT_FOUND;
    }

    rtfpath = config.project_path("%s", wu.result_template_file);
    sprintf(suffix, "%d_%d_%d", getpid(), (int)time(0), seqno++);
    retval = create_result(wu, (char *)rtfpath, suffix, key, config, 0, 0);
    if (retval) {
        log_messages.printf(MSG_CRITICAL,
            "[WU#%d %s] create_result() %d\n", wu.id, wu.name, retval
        );
        return retval;
    }
    int result_id = boinc_db.insert_id();
    DB_RESULT result;
    retval = result.lookup_id(result_id);
    add_result_to_reply(result, wu, bavp, false);

    // if this is a one-job assignment, fill in assignment.resultid
    // so that it doesn't get sent again
    //
    if (!asg.multi && asg.target_type!=ASSIGN_NONE) {
        DB_ASSIGNMENT db_asg;
        db_asg.id = asg.id;
        sprintf(buf, "resultid=%d", result_id);
        retval = db_asg.update_field(buf);
        if (retval) {
            log_messages.printf(MSG_CRITICAL,
                "assign update failed: %d\n", retval
            );
            return retval;
        }
        asg.resultid = result_id;
    }
    if (config.debug_assignment) {
        log_messages.printf(MSG_NORMAL,
            "[assign] [WU#%d] [RESULT#%d] [HOST#%d] send assignment %d\n",
            wu.id, result_id, g_reply->host.id, asg.id
        );
    }
    return 0;
}
Beispiel #9
0
// Send targeted jobs of a given type.
// NOTE: there may be an atomicity problem in the following.
// Ideally it should be in a transaction.
//
bool send_jobs(int assign_type) {
    DB_ASSIGNMENT asg;
    DB_RESULT result;
    DB_WORKUNIT wu;
    int retval;
    bool sent_something = false;
    char query[256];

    switch (assign_type) {
    case ASSIGN_USER:
        sprintf(query, "where target_type=%d and target_id=%lu and multi=0",
            ASSIGN_USER, g_reply->user.id
        );
        break;
    case ASSIGN_HOST:
        sprintf(query, "where target_type=%d and target_id=%lu and multi=0",
            ASSIGN_HOST, g_reply->host.id
        );
        break;
    case ASSIGN_TEAM:
        sprintf(query, "where target_type=%d and target_id=%lu and multi=0",
            ASSIGN_TEAM, g_reply->team.id
        );
        break;
    }

    while (!asg.enumerate(query)) {
        if (!work_needed(false)) {
            asg.end_enumerate();
            break;
        }

        // if the WU doesn't exist, delete the assignment record.
        //
        retval = wu.lookup_id(asg.workunitid);
        if (retval) {
            asg.delete_from_db();
            continue;
        }

        if (!need_targeted_instance(wu, g_reply->host.id)) {
            continue;
        }

        // OK, send the job
        //
        if (config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "sending targeted job: %s\n", wu.name
            );
        }
        retval = send_assigned_job(asg);
        if (retval) {
            log_messages.printf(MSG_NORMAL,
                "failed to send targeted job: %s\n", boincerror(retval)
            );
            continue;
        }

        sent_something = true;

        // update the WU's transition time to time out this job
        //
        retval = wu.lookup_id(asg.workunitid);
        if (retval) continue;
        int new_tt = time(0) + wu.delay_bound;
        if (new_tt < wu.transition_time) {
            char buf2[256];
            sprintf(buf2, "transition_time=%d", new_tt);
            wu.update_field(buf2);
        }
    }
    return sent_something;
}