// Called when there's evidence that the host has detached. // Mark in-progress results for the given host // as server state OVER, outcome CLIENT_DETACHED. // This serves two purposes: // 1) make sure we don't resend these results to the host // (they may be the reason the user detached) // 2) trigger the generation of new results for these WUs // static void mark_results_over(DB_HOST& host) { char buf[256], buf2[256]; DB_RESULT result; sprintf(buf, "where hostid=%d and server_state=%d", host.id, RESULT_SERVER_STATE_IN_PROGRESS ); while (!result.enumerate(buf)) { sprintf(buf2, "server_state=%d, outcome=%d, received_time = %ld", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_CLIENT_DETACHED, time(0) ); result.update_field(buf2); // and trigger WU transition // DB_WORKUNIT wu; wu.id = result.workunitid; sprintf(buf2, "transition_time=%d", (int)time(0)); wu.update_field(buf2); log_messages.printf(MSG_CRITICAL, "[HOST#%d] [RESULT#%u] [WU#%u] changed CPID: marking in-progress result %s as client error!\n", host.id, result.id, result.workunitid, result.name ); } }
int handle_result(DB_RESULT& result) { DB_WORKUNIT wu; int retval; char path[256]; char buf[256]; FILE* f; retval = wu.lookup_id(result.workunitid); if (retval) { printf( "ERROR: can't find WU %d for result %d\n", result.workunitid, result.id ); return 1; } get_file_path(wu, path); f = fopen(path, "r"); if (f) { fclose(f); } else { printf("no file %s for result %d\n", path, result.id ); if (repair) { if (result.server_state == RESULT_SERVER_STATE_UNSENT) { result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_COULDNT_SEND; sprintf( buf,"server_state=%d, outcome=%d", result.server_state, result.outcome ); retval = result.update_field(buf); if (retval) { printf( "ERROR: can't update result %d\n", result.id ); return 1; } } } return 1; } return 0; }
// Arrange that further results for this workunit // will be sent only to hosts with the given user ID. // This could be used, for example, so that late workunits // are sent only to cloud or cluster resources // int restrict_wu_to_user(WORKUNIT& _wu, int userid) { DB_RESULT result; DB_ASSIGNMENT asg; DB_WORKUNIT wu; wu = _wu; char buf[256]; int retval; // mark unsent results as DIDNT_NEED // sprintf(buf, "where workunitid=%d and server_state=%d", wu.id, RESULT_SERVER_STATE_UNSENT ); while (!result.enumerate(buf)) { char buf2[256]; sprintf(buf2, "server_state=%d, outcome=%d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_DIDNT_NEED ); result.update_field(buf2); } // mark the WU as TRANSITION_NO_NEW_RESULTS // sprintf(buf, "transitioner_flags=%d", TRANSITION_NO_NEW_RESULTS); retval = wu.update_field(buf); if (retval) return retval; // create an assignment record // asg.clear(); asg.create_time = time(0); asg.target_id = userid; asg.target_type = ASSIGN_USER; asg.multi = 0; asg.workunitid = wu.id; retval = asg.insert(); return retval; }
// return true if we changed the file_delete_state of a WU or a result // bool do_pass(bool retry_error) { DB_WORKUNIT wu; DB_RESULT result; bool did_something = false; char buf[256]; char clause[256]; int retval, new_state; check_stop_daemons(); strcpy(clause, ""); if (id_modulus) { sprintf(clause, " and id %% %d = %d ", id_modulus, id_remainder); } if (dont_delete_batches) { strcat(clause, " and batch <= 0 "); } if (appid) { sprintf(buf, " and appid = %d ", appid); strcat(clause, buf); } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, WUS_PER_ENUM ); while (do_input_files) { retval = wu.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n"); exit(0); } break; } if (preserve_wu_files) { retval = 0; } else { retval = wu_delete_files(wu); } if (retval) { new_state = FILE_DELETE_ERROR; log_messages.printf(MSG_CRITICAL, "[WU#%d] file deletion failed: %s\n", wu.id, boincerror(retval) ); } else { new_state = FILE_DELETE_DONE; } if (new_state != wu.file_delete_state) { sprintf(buf, "file_delete_state=%d", new_state); retval = wu.update_field(buf); if (retval) { log_messages.printf(MSG_CRITICAL, "[WU#%d] update failed: %s\n", wu.id, boincerror(retval) ); } else { log_messages.printf(MSG_DEBUG, "[WU#%d] file_delete_state updated\n", wu.id ); did_something = true; } } } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, RESULTS_PER_ENUM ); while (do_output_files) { retval = result.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n"); exit(0); } break; } if (preserve_result_files) { retval = 0; } else { retval = result_delete_files(result); } if (retval) { new_state = FILE_DELETE_ERROR; log_messages.printf(MSG_CRITICAL, "[RESULT#%d] file deletion failed: %s\n", result.id, boincerror(retval) ); } else { new_state = FILE_DELETE_DONE; } if (new_state != result.file_delete_state) { sprintf(buf, "file_delete_state=%d", new_state); retval = result.update_field(buf); if (retval) { log_messages.printf(MSG_CRITICAL, "[RESULT#%d] update failed: %s\n", result.id, boincerror(retval) ); } else { log_messages.printf(MSG_DEBUG, "[RESULT#%d] file_delete_state updated\n", result.id ); did_something = true; } } } return did_something; }
// Enumerate jobs from DB until find one that is not already in the work array. // If find one, return true. // If reach end of enum for second time on this array scan, return false // static bool get_job_from_db( DB_WORK_ITEM& wi, // enumerator to get job from int app_index, // if using --allapps, the app index int& enum_phase, int& ncollisions ) { bool collision; int retval, j, enum_size; char select_clause[256]; if (all_apps) { sprintf(select_clause, "%s and r1.appid=%lu", mod_select_clause, ssp->apps[app_index].id ); enum_size = enum_sizes[app_index]; } else { safe_strcpy(select_clause, mod_select_clause); enum_size = enum_limit; } int hrt = ssp->apps[app_index].homogeneous_redundancy; while (1) { if (hrt && config.hr_allocate_slots) { retval = wi.enumerate_all(enum_size, select_clause); } else { retval = wi.enumerate(enum_size, select_clause, order_clause); } if (retval) { if (retval != ERR_DB_NOT_FOUND) { // If DB server dies, exit; // so /start (run from crontab) will restart us eventually. // log_messages.printf(MSG_CRITICAL, "DB connection lost, exiting\n" ); exit(0); } // we've reach the end of the result set // switch (enum_phase) { case ENUM_FIRST_PASS: enum_phase = ENUM_SECOND_PASS; ncollisions = 0; // disregard collisions - maybe we'll find new jobs break; case ENUM_SECOND_PASS: enum_phase = ENUM_OVER; return false; } log_messages.printf(MSG_NORMAL, "restarted enumeration for appid %lu\n", ssp->apps[app_index].id ); } else { // Check for invalid application ID // if (!ssp->lookup_app(wi.wu.appid)) { #if 0 log_messages.printf(MSG_CRITICAL, "result [RESULT#%u] has bad appid %d; clean up your DB!\n", wi.res_id, wi.wu.appid ); #endif continue; } // if the WU had an error, mark result as DIDNT_NEED // if (wi.wu.error_mask) { char buf[256]; DB_RESULT result; result.id = wi.res_id; sprintf(buf, "server_state=%d, outcome=%d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_DIDNT_NEED ); result.update_field(buf); log_messages.printf(MSG_NORMAL, "[RESULT#%lu] WU had error, marking as DIDNT_NEED\n", wi.res_id ); continue; } // Check for collision (i.e. this result already is in the array) // collision = false; for (j=0; j<ssp->max_wu_results; j++) { if (ssp->wu_results[j].state != WR_STATE_EMPTY && ssp->wu_results[j].resultid == wi.res_id) { // If the result is already in shared mem, // and another instance of the WU has been sent, // bump the infeasible count to encourage // it to get sent more quickly // if (ssp->wu_results[j].infeasible_count == 0) { if (wi.wu.hr_class > 0) { ssp->wu_results[j].infeasible_count++; } } ncollisions++; collision = true; log_messages.printf(MSG_DEBUG, "result [RESULT#%lu] already in array\n", wi.res_id ); break; } } if (collision) { continue; } // if using HR, check whether we've exceeded quota for this class // if (hrt && config.hr_allocate_slots) { if (!hr_info.accept(hrt, wi.wu.hr_class)) { log_messages.printf(MSG_DEBUG, "rejecting [RESULT#%lu] because HR class %d/%d over quota\n", wi.res_id, hrt, wi.wu.hr_class ); continue; } } return true; } } return false; // never reached }