Ejemplo n.º 1
0
// Called when there's evidence that the host has detached.
// Mark in-progress results for the given host
// as server state OVER, outcome CLIENT_DETACHED.
// This serves two purposes:
// 1) make sure we don't resend these results to the host
//    (they may be the reason the user detached)
// 2) trigger the generation of new results for these WUs
//
static void mark_results_over(DB_HOST& host) {
    char buf[256], buf2[256];
    DB_RESULT result;
    sprintf(buf, "where hostid=%d and server_state=%d",
            host.id,
            RESULT_SERVER_STATE_IN_PROGRESS
           );
    while (!result.enumerate(buf)) {
        sprintf(buf2,
                "server_state=%d, outcome=%d, received_time = %ld",
                RESULT_SERVER_STATE_OVER,
                RESULT_OUTCOME_CLIENT_DETACHED,
                time(0)
               );
        result.update_field(buf2);

        // and trigger WU transition
        //
        DB_WORKUNIT wu;
        wu.id = result.workunitid;
        sprintf(buf2, "transition_time=%d", (int)time(0));
        wu.update_field(buf2);

        log_messages.printf(MSG_CRITICAL,
                            "[HOST#%d] [RESULT#%u] [WU#%u] changed CPID: marking in-progress result %s as client error!\n",
                            host.id, result.id, result.workunitid, result.name
                           );
    }
}
Ejemplo n.º 2
0
int handle_result(DB_RESULT& result) {
    DB_WORKUNIT wu;
    int retval;
    char path[256];
    char buf[256];
    FILE* f;

    retval = wu.lookup_id(result.workunitid);
    if (retval) {
        printf(
            "ERROR: can't find WU %d for result %d\n",
            result.workunitid, result.id
        );
        return 1;
    }
    get_file_path(wu, path);
    f = fopen(path, "r");
    if (f) {
        fclose(f);
    } else {
        printf("no file %s for result %d\n",
               path, result.id
              );
        if (repair) {
            if (result.server_state == RESULT_SERVER_STATE_UNSENT) {
                result.server_state = RESULT_SERVER_STATE_OVER;
                result.outcome = RESULT_OUTCOME_COULDNT_SEND;
                sprintf(
                    buf,"server_state=%d, outcome=%d",
                    result.server_state, result.outcome
                );
                retval = result.update_field(buf);
                if (retval) {
                    printf(
                        "ERROR: can't update result %d\n",
                        result.id
                    );
                    return 1;
                }
            }
        }
        return 1;
    }
    return 0;
}
Ejemplo n.º 3
0
// Arrange that further results for this workunit
// will be sent only to hosts with the given user ID.
// This could be used, for example, so that late workunits
// are sent only to cloud or cluster resources
//
int restrict_wu_to_user(WORKUNIT& _wu, int userid) {
    DB_RESULT result;
    DB_ASSIGNMENT asg;
    DB_WORKUNIT wu;
    wu = _wu;
    char buf[256];
    int retval;

    // mark unsent results as DIDNT_NEED
    //
    sprintf(buf, "where workunitid=%d and server_state=%d",
        wu.id, RESULT_SERVER_STATE_UNSENT
    );
    while (!result.enumerate(buf)) {
        char buf2[256];
        sprintf(buf2, "server_state=%d, outcome=%d",
            RESULT_SERVER_STATE_OVER,
            RESULT_OUTCOME_DIDNT_NEED
        );
        result.update_field(buf2);
    }

    // mark the WU as TRANSITION_NO_NEW_RESULTS
    //
    sprintf(buf, "transitioner_flags=%d", TRANSITION_NO_NEW_RESULTS);
    retval = wu.update_field(buf);
    if (retval) return retval;

    // create an assignment record
    //
    asg.clear();
    asg.create_time = time(0);
    asg.target_id = userid;
    asg.target_type = ASSIGN_USER;
    asg.multi = 0;
    asg.workunitid = wu.id;
    retval = asg.insert();
    return retval;
}
Ejemplo n.º 4
0
// return true if we changed the file_delete_state of a WU or a result
//
bool do_pass(bool retry_error) {
    DB_WORKUNIT wu;
    DB_RESULT result;
    bool did_something = false;
    char buf[256];
    char clause[256];
    int retval, new_state;

    check_stop_daemons();

    strcpy(clause, "");
    if (id_modulus) {
        sprintf(clause, " and id %% %d = %d ", id_modulus, id_remainder);
    }
    if (dont_delete_batches) {
        strcat(clause, " and batch <= 0 ");
    }
    if (appid) {
        sprintf(buf, " and appid = %d ", appid);
        strcat(clause, buf);
    }
    sprintf(buf,
            "where file_delete_state=%d %s limit %d",
            retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY,
            clause, WUS_PER_ENUM
           );

    while (do_input_files) {
        retval = wu.enumerate(buf);
        if (retval) {
            if (retval != ERR_DB_NOT_FOUND) {
                log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n");
                exit(0);
            }
            break;
        }

        if (preserve_wu_files) {
            retval = 0;
        } else {
            retval = wu_delete_files(wu);
        }
        if (retval) {
            new_state = FILE_DELETE_ERROR;
            log_messages.printf(MSG_CRITICAL,
                                "[WU#%d] file deletion failed: %s\n", wu.id, boincerror(retval)
                               );
        } else {
            new_state = FILE_DELETE_DONE;
        }
        if (new_state != wu.file_delete_state) {
            sprintf(buf, "file_delete_state=%d", new_state);
            retval = wu.update_field(buf);
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                                    "[WU#%d] update failed: %s\n", wu.id, boincerror(retval)
                                   );
            } else {
                log_messages.printf(MSG_DEBUG,
                                    "[WU#%d] file_delete_state updated\n", wu.id
                                   );
                did_something = true;
            }
        }
    }

    sprintf(buf,
            "where file_delete_state=%d %s limit %d",
            retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY,
            clause, RESULTS_PER_ENUM
           );

    while (do_output_files) {
        retval = result.enumerate(buf);
        if (retval) {
            if (retval != ERR_DB_NOT_FOUND) {
                log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n");
                exit(0);
            }
            break;
        }

        if (preserve_result_files) {
            retval = 0;
        } else {
            retval = result_delete_files(result);
        }
        if (retval) {
            new_state = FILE_DELETE_ERROR;
            log_messages.printf(MSG_CRITICAL,
                                "[RESULT#%d] file deletion failed: %s\n", result.id, boincerror(retval)
                               );
        } else {
            new_state = FILE_DELETE_DONE;
        }
        if (new_state != result.file_delete_state) {
            sprintf(buf, "file_delete_state=%d", new_state);
            retval = result.update_field(buf);
            if (retval) {
                log_messages.printf(MSG_CRITICAL,
                                    "[RESULT#%d] update failed: %s\n", result.id, boincerror(retval)
                                   );
            } else {
                log_messages.printf(MSG_DEBUG,
                                    "[RESULT#%d] file_delete_state updated\n", result.id
                                   );
                did_something = true;
            }
        }
    }

    return did_something;
}
Ejemplo n.º 5
0
// Enumerate jobs from DB until find one that is not already in the work array.
// If find one, return true.
// If reach end of enum for second time on this array scan, return false
// 
static bool get_job_from_db(
    DB_WORK_ITEM& wi,    // enumerator to get job from
    int app_index,       // if using --allapps, the app index
    int& enum_phase,
    int& ncollisions
) {
    bool collision;
    int retval, j, enum_size;
    char select_clause[256];
    
    if (all_apps) {
        sprintf(select_clause, "%s and r1.appid=%lu",
            mod_select_clause, ssp->apps[app_index].id
        );
        enum_size = enum_sizes[app_index];
    } else {
        safe_strcpy(select_clause, mod_select_clause);
        enum_size = enum_limit;
    }
    int hrt = ssp->apps[app_index].homogeneous_redundancy;

    while (1) {
        if (hrt && config.hr_allocate_slots) {
            retval = wi.enumerate_all(enum_size, select_clause);
        } else {
            retval = wi.enumerate(enum_size, select_clause, order_clause);
        }
        if (retval) {
            if (retval != ERR_DB_NOT_FOUND) {
                // If DB server dies, exit;
                // so /start (run from crontab) will restart us eventually.
                //
                log_messages.printf(MSG_CRITICAL,
                    "DB connection lost, exiting\n"
                );
                exit(0);
            }

            // we've reach the end of the result set
            //
            switch (enum_phase) {
            case ENUM_FIRST_PASS:
                enum_phase = ENUM_SECOND_PASS;
                ncollisions = 0;
                    // disregard collisions - maybe we'll find new jobs
                break;
            case ENUM_SECOND_PASS:
                enum_phase = ENUM_OVER;
                return false;
            }
            log_messages.printf(MSG_NORMAL,
                "restarted enumeration for appid %lu\n",
                ssp->apps[app_index].id
            );
        } else {
            // Check for invalid application ID
            //
            if (!ssp->lookup_app(wi.wu.appid)) {
#if 0
                log_messages.printf(MSG_CRITICAL,
                    "result [RESULT#%u] has bad appid %d; clean up your DB!\n",
                    wi.res_id, wi.wu.appid
                );
#endif
                continue;
            }
            
            // if the WU had an error, mark result as DIDNT_NEED
            //
            if (wi.wu.error_mask) {
                char buf[256];
                DB_RESULT result;
                result.id = wi.res_id;
                sprintf(buf, "server_state=%d, outcome=%d",
                    RESULT_SERVER_STATE_OVER,
                    RESULT_OUTCOME_DIDNT_NEED
                );
                result.update_field(buf);
                log_messages.printf(MSG_NORMAL,
                    "[RESULT#%lu] WU had error, marking as DIDNT_NEED\n",
                    wi.res_id
                );
                continue;
            }

            // Check for collision (i.e. this result already is in the array)
            //
            collision = false;
            for (j=0; j<ssp->max_wu_results; j++) {
                if (ssp->wu_results[j].state != WR_STATE_EMPTY && ssp->wu_results[j].resultid == wi.res_id) {
                    // If the result is already in shared mem,
                    // and another instance of the WU has been sent,
                    // bump the infeasible count to encourage
                    // it to get sent more quickly
                    //
                    if (ssp->wu_results[j].infeasible_count == 0) {
                        if (wi.wu.hr_class > 0) {
                            ssp->wu_results[j].infeasible_count++;
                        }
                    }
                    ncollisions++;
                    collision = true;
                    log_messages.printf(MSG_DEBUG,
                        "result [RESULT#%lu] already in array\n", wi.res_id
                    );
                    break;
                }
            }
            if (collision) {
                continue;
            }

            // if using HR, check whether we've exceeded quota for this class
            //
            if (hrt && config.hr_allocate_slots) {
                if (!hr_info.accept(hrt, wi.wu.hr_class)) {
                    log_messages.printf(MSG_DEBUG,
                        "rejecting [RESULT#%lu] because HR class %d/%d over quota\n",
                        wi.res_id, hrt, wi.wu.hr_class
                    );
                    continue;
                }
            }
            return true;
        }
    }
    return false;   // never reached
}