int main() { if ( boinc_db.open("predictor", "boinc", NULL, NULL) ) { printf("Open failed\n"); return 0; } DB_WORKUNIT workunit; char buf[256]; while (!workunit.enumerate()) { printf("workunit %d wsn %d\n", workunit.id, workunit.workseq_next); DB_RESULT result; sprintf(buf, "where workunitid=%d", workunit.id); if ( !result.enumerate(buf) ) { DB_HOST host; sprintf(buf, "where id=%d", result.hostid); if ( !host.enumerate(buf) ) { workunit.workseq_next = OS(host) + CPU(host); if ( workunit.update() ) printf("Update failed!\n"); } } } };
// send a job for the given assignment // static int send_assigned_job(ASSIGNMENT& asg) { int retval; DB_WORKUNIT wu; char suffix[256], path[MAXPATHLEN]; const char *rtfpath; static bool first=true; static int seqno=0; static R_RSA_PRIVATE_KEY key; BEST_APP_VERSION* bavp; if (first) { first = false; sprintf(path, "%s/upload_private", config.key_dir); retval = read_key_file(path, key); if (retval) { log_messages.printf(MSG_CRITICAL, "can't read key\n"); return -1; } } retval = wu.lookup_id(asg.workunitid); if (retval) { log_messages.printf(MSG_CRITICAL, "assigned WU %d not found\n", asg.workunitid ); return retval; } bavp = get_app_version(wu, false, false); if (!bavp) { log_messages.printf(MSG_CRITICAL, "App version for assigned WU not found\n" ); return ERR_NOT_FOUND; } rtfpath = config.project_path("%s", wu.result_template_file); sprintf(suffix, "%d_%d_%d", getpid(), (int)time(0), seqno++); retval = create_result( wu, const_cast<char*>(rtfpath), suffix, key, config, 0, 0 ); if (retval) { log_messages.printf(MSG_CRITICAL, "[WU#%d %s] create_result(): %s\n", wu.id, wu.name, boincerror(retval) ); return retval; } int result_id = boinc_db.insert_id(); SCHED_DB_RESULT result; retval = result.lookup_id(result_id); add_result_to_reply(result, wu, bavp, false); if (config.debug_assignment) { log_messages.printf(MSG_NORMAL, "[assign] [WU#%d] [RESULT#%d] [HOST#%d] send assignment %d\n", wu.id, result_id, g_reply->host.id, asg.id ); } return 0; }
// Called when there's evidence that the host has detached. // Mark in-progress results for the given host // as server state OVER, outcome CLIENT_DETACHED. // This serves two purposes: // 1) make sure we don't resend these results to the host // (they may be the reason the user detached) // 2) trigger the generation of new results for these WUs // static void mark_results_over(DB_HOST& host) { char buf[256], buf2[256]; DB_RESULT result; sprintf(buf, "where hostid=%d and server_state=%d", host.id, RESULT_SERVER_STATE_IN_PROGRESS ); while (!result.enumerate(buf)) { sprintf(buf2, "server_state=%d, outcome=%d, received_time = %ld", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_CLIENT_DETACHED, time(0) ); result.update_field(buf2); // and trigger WU transition // DB_WORKUNIT wu; wu.id = result.workunitid; sprintf(buf2, "transition_time=%d", (int)time(0)); wu.update_field(buf2); log_messages.printf(MSG_CRITICAL, "[HOST#%d] [RESULT#%u] [WU#%u] changed CPID: marking in-progress result %s as client error!\n", host.id, result.id, result.workunitid, result.name ); } }
void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) { char file_name[256], buf[BLOB_SIZE], new_file_name[256]; char new_buf[BLOB_SIZE]; char * p; int retval; DB_WORKUNIT wu = original_wu; static int file_seqno = 0, wu_seqno = 0; safe_strcpy(buf, starting_xml); p = strtok(buf, "\n"); strcpy(file_name, ""); // make new names for the WU's input files, // so clients will download them. // (don't actually copy files; URL stays the same) // while (p) { if (parse_str(p, "<name>", file_name, sizeof(file_name))) { sprintf( new_file_name, "%s__%d_%d", file_name, start_time, file_seqno++ ); safe_strcpy(new_buf, starting_xml); replace_file_name(new_buf, file_name, new_file_name); safe_strcpy(wu.xml_doc, new_buf); } p = strtok(0, "\n"); } // set various fields for new WU (all others are copied) // wu.id = 0; wu.create_time = time(0); // the name of the new WU cannot include the original WU name, // because the original one probably contains "nodelete", // but we want the copy to be eligible for file deletion // sprintf(wu.name, "wu_%d_%d", start_time, wu_seqno++); wu.need_validate = false; wu.canonical_resultid = 0; wu.canonical_credit = 0; wu.hr_class = 0; wu.transition_time = time(0); wu.error_mask = 0; wu.file_delete_state = FILE_DELETE_INIT; wu.assimilate_state = ASSIMILATE_INIT; retval = wu.insert(); if (retval) { log_messages.printf(MSG_CRITICAL, "Failed to created WU: %s; exiting\n", boincerror(retval) ); exit(retval); } original_wu.id = boinc_db.insert_id(); log_messages.printf(MSG_DEBUG, "Created %s, clone of %s\n", wu.name, original_wu.name ); }
int main_loop(APP& app) { DB_WORKUNIT wu; DB_RESULT canonical_result, result; char buf[256]; char buf2[256]; int retval; task_t task; while(1) { check_stop_daemons(); sprintf(buf, "where appid=%d and assimilate_state=%d", app.id, ASSIMILATE_READY); // Заполнение полей текущего ворк юнита retval = wu.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n"); exit(0); } } // Заполнение полей текущего задания sscanf(wu.name, "%[^_]_%[^_]_%d_%*d_%d", task.app_name, task.name, &task.timestamp, &task.size); // Создание списка результатов задания vector<RESULT> results; if (strlen(task.name) > 0) { sprintf(buf, "INNER JOIN workunit ON result.id = workunit.canonical_resultid WHERE workunit.name like \"%%_%s_%%\" and workunit.assimilate_state=%d", task.name, ASSIMILATE_READY); while (!result.enumerate(buf)) { results.push_back(result); } } // Склеивание заданий if ((results.size() == task.size) && (task.size != 0)) { log_messages.printf(MSG_NORMAL,"[%s] Assimilating task\n", task.name); retval = rmerge(task, results); if (retval) { log_messages.printf(MSG_CRITICAL,"[%s] Assimilation failed\n", task.name); } else { // Обновление записей в базе if (update_db) { sprintf(buf, "assimilate_state=%d, transition_time=%d", ASSIMILATE_DONE, (int)time(0)); sprintf(buf2, "appid=%d and assimilate_state=%d and name like \"%%_%s_%%\"", app.id, ASSIMILATE_READY, task.name); wu.update_fields_noid(buf, buf2); boinc_db.commit_transaction(); } log_messages.printf(MSG_NORMAL,"[%s] Task assimilated\n", task.name); //Очистка всех структур wu.clear(); memset(&task, 0, sizeof(task)); results.clear(); } } sleep(SLEEP_INTERVAL); } }
// We're purging this item because it's been in shared mem too long. // In general it will get added again soon. // But if it's committed to an HR class, // it could be because it got sent to a rare host. // Un-commit it by zeroing out the WU's hr class, // and incrementing target_nresults // static void purge_stale(WU_RESULT& wu_result) { DB_WORKUNIT wu; wu.id = wu_result.workunit.id; if (wu_result.workunit.hr_class) { char buf[256]; sprintf(buf, "hr_class=0, target_nresults=target_nresults+1, transition_time=%ld", time(0) ); wu.update_field(buf); } }
// collect information and call delete_antiques_from_dir() // for every relevant directory // static int delete_antiques() { DB_WORKUNIT wu; time_t t = 0; int ret = 0; // t = min (create_time_of_oldest_wu, 31days_ago) t = time(0) - 32*86400; if (!wu.enumerate("order by id limit 1") && (t > wu.create_time)) { t = wu.create_time - 86400; } // find numerical userid of apache struct passwd *apache_info = getpwnam(config.httpd_user); if (!apache_info) { log_messages.printf(MSG_CRITICAL, "Couldn't find http_user '%s' in passwd\n", config.httpd_user ); return -1; } log_messages.printf(MSG_DEBUG, "delete_antiques(): " "Deleting files older than epoch %lu (%s) with userid %u\n", (unsigned long)t, actime(t), apache_info->pw_uid ); // if fanout is configured, scan every fanout directory, // else just the plain upload directory if (config.uldl_dir_fanout) { for(int d = 0; d < config.uldl_dir_fanout; d++) { char buf[270]; snprintf(buf, sizeof(buf), "%s/%x", config.upload_dir, d); log_messages.printf(MSG_DEBUG, "delete_antiques(): scanning upload fanout directory '%s'\n", buf ); ret = delete_antiques_from_dir(buf, t, apache_info->pw_uid); if (ret < 0) return ret; } } else { log_messages.printf(MSG_DEBUG, "delete_antiques(): scanning upload directory '%s'\n", config.upload_dir ); ret = delete_antiques_from_dir(config.upload_dir, t, apache_info->pw_uid); } return ret; }
// create one new job // int make_job(int node,int sub_node) { DB_WORKUNIT wu; char name[256], path[MAXPATHLEN]; const char* infiles[2]; int retval; // make a unique name (for the job and its input file) // sprintf(name, "%s_%d_%d_%d", app_name,start_time, node,sub_node); // Create the input file. // Put it at the right place in the download dir hierarchy // retval = config.download_path(name, path); if (retval) return retval; FILE* f = fopen(path, "w"); if (!f) return ERR_FOPEN; //no:of vertices node_to_work fprintf(f,"%d %d %d\n",n1,node,sub_node); fclose(f); // Fill in the job parameters // wu.clear(); wu.appid = app.id; strcpy(wu.name, name); wu.rsc_fpops_est = n1*1e10; wu.rsc_fpops_bound = 1e24; wu.rsc_memory_bound = 1e8; wu.rsc_disk_bound = 1e8; wu.delay_bound = 30*n1; wu.min_quorum = REPLICATION_FACTOR; wu.target_nresults = REPLICATION_FACTOR; wu.max_error_results = REPLICATION_FACTOR*4; wu.max_total_results = REPLICATION_FACTOR*8; wu.max_success_results = REPLICATION_FACTOR*4; infiles[0] = name; infiles[1] = graphs; // Register the job with BOINC // sprintf(path, "templates/%s", out_template_file); return create_work( wu, in_template, path, config.project_path(path), infiles, 2, config ); }
// Try to send the client this result // This can fail because: // - result needs more disk/mem/speed than host has // - already sent a result for this WU // - no app_version available // static int possibly_send_result(SCHED_DB_RESULT& result) { DB_WORKUNIT wu; SCHED_DB_RESULT result2; int retval; long count; char buf[256]; BEST_APP_VERSION* bavp; g_wreq->no_jobs_available = false; retval = wu.lookup_id(result.workunitid); if (retval) return ERR_DB_NOT_FOUND; // This doesn't take into account g_wreq->allow_non_selected_apps, // however Einstein@Home, which is the only project that currently uses // this locality scheduler, doesn't support the respective project-specific // preference setting // if (app_not_selected(wu.appid)) return ERR_NO_APP_VERSION; bavp = get_app_version(wu, true, false); if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) { char help_msg_buf[512]; sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name ); g_reply->insert_message(help_msg_buf, "notice"); g_reply->set_delay(DELAY_ANONYMOUS); } if (!bavp) return ERR_NO_APP_VERSION; APP* app = ssp->lookup_app(wu.appid); retval = wu_is_infeasible_fast( wu, result.server_state, result.report_deadline, result.priority, *app, *bavp ); if (retval) return retval; if (config.one_result_per_user_per_wu) { sprintf(buf, "where userid=%lu and workunitid=%lu", g_reply->user.id, wu.id); retval = result2.count(count, buf); if (retval) return ERR_DB_NOT_FOUND; if (count > 0) return ERR_WU_USER_RULE; } return add_result_to_reply(result, wu, bavp, true); }
int get_credit_from_wu(WORKUNIT& wu, vector<RESULT>&, double& credit) { double x; int retval; DB_WORKUNIT dbwu; dbwu.id = wu.id; retval = dbwu.get_field_str("xml_doc", dbwu.xml_doc, sizeof(dbwu.xml_doc)); if (!retval) { if (parse_double(dbwu.xml_doc, "<credit>", x)) { credit = x; return 0; } } return ERR_XML_PARSE; }
JOB_DESC() { wu.clear(); command_line = NULL; assign_flag = false; assign_multi = false; strcpy(wu_template_file, ""); strcpy(result_template_file, ""); assign_id = 0; assign_type = ASSIGN_NONE; // defaults (in case they're not in WU template) // wu.id = 0; wu.min_quorum = DEFAULT_MIN_QUORUM; wu.target_nresults = DEFAULT_TARGET_NRESULTS; wu.max_error_results = DEFAULT_MAX_ERROR_RESULTS; wu.max_total_results = DEFAULT_MAX_TOTAL_RESULTS; wu.max_success_results = DEFAULT_MAX_SUCCESS_RESULTS; wu.rsc_fpops_est = DEFAULT_RSC_FPOPS_EST; wu.rsc_fpops_bound = DEFAULT_RSC_FPOPS_BOUND; wu.rsc_memory_bound = DEFAULT_RSC_MEMORY_BOUND; wu.rsc_disk_bound = DEFAULT_RSC_DISK_BOUND; wu.rsc_bandwidth_bound = 0.0; // Not used wu.delay_bound = DEFAULT_DELAY_BOUND; }
// cancel a particular job // int cancel_job(DB_WORKUNIT& wu) { DB_RESULT result; char set_clause[256], where_clause[256]; int retval; // cancel unsent results // sprintf(set_clause, "server_state=%d, outcome=%d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_DIDNT_NEED ); sprintf(where_clause, "server_state<=%d and workunitid=%lu", RESULT_SERVER_STATE_UNSENT, wu.id ); retval = result.update_fields_noid(set_clause, where_clause); if (retval) return retval; // cancel the workunit // sprintf(set_clause, "error_mask=error_mask|%d, transition_time=%d", WU_ERROR_CANCELLED, (int)(time(0)) ); retval = wu.update_field(set_clause); if (retval) return retval; return 0; }
JOB_DESC() { wu.clear(); command_line = NULL; assign_flag = false; assign_multi = false; strcpy(result_template_file, ""); strcpy(additional_xml, ""); assign_id = 0; assign_type = ASSIGN_NONE; // defaults (in case they're not in WU template) // wu.id = 0; wu.min_quorum = 2; wu.target_nresults = 2; wu.max_error_results = 3; wu.max_total_results = 10; wu.max_success_results = 6; wu.rsc_fpops_est = 3600e9; wu.rsc_fpops_bound = 86400e9; wu.rsc_memory_bound = 5e8; wu.rsc_disk_bound = 1e9; wu.rsc_bandwidth_bound = 0.0; wu.delay_bound = 7*86400; }
int handle_result(DB_RESULT& result) { DB_WORKUNIT wu; int retval; char path[256]; char buf[256]; FILE* f; retval = wu.lookup_id(result.workunitid); if (retval) { printf( "ERROR: can't find WU %d for result %d\n", result.workunitid, result.id ); return 1; } get_file_path(wu, path); f = fopen(path, "r"); if (f) { fclose(f); } else { printf("no file %s for result %d\n", path, result.id ); if (repair) { if (result.server_state == RESULT_SERVER_STATE_UNSENT) { result.server_state = RESULT_SERVER_STATE_OVER; result.outcome = RESULT_OUTCOME_COULDNT_SEND; sprintf( buf,"server_state=%d, outcome=%d", result.server_state, result.outcome ); retval = result.update_field(buf); if (retval) { printf( "ERROR: can't update result %d\n", result.id ); return 1; } } } return 1; } return 0; }
// Try to send the client this result // This can fail because: // - result needs more disk/mem/speed than host has // - already sent a result for this WU // - no app_version available // static int possibly_send_result(SCHED_DB_RESULT& result) { DB_WORKUNIT wu; SCHED_DB_RESULT result2; int retval, count; char buf[256]; BEST_APP_VERSION* bavp; g_wreq->no_jobs_available = false; retval = wu.lookup_id(result.workunitid); if (retval) return ERR_DB_NOT_FOUND; bavp = get_app_version(wu, true, false); if (!config.locality_scheduler_fraction && !bavp && is_anonymous(g_request->platforms.list[0])) { char help_msg_buf[512]; sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name ); g_reply->insert_message(help_msg_buf, "notice"); g_reply->set_delay(DELAY_ANONYMOUS); } if (!bavp) return ERR_NO_APP_VERSION; APP* app = ssp->lookup_app(wu.appid); retval = wu_is_infeasible_fast( wu, result.server_state, result.report_deadline, result.priority, *app, *bavp ); if (retval) return retval; if (config.one_result_per_user_per_wu) { sprintf(buf, "where userid=%d and workunitid=%d", g_reply->user.id, wu.id); retval = result2.count(count, buf); if (retval) return ERR_DB_NOT_FOUND; if (count > 0) return ERR_WU_USER_RULE; } return add_result_to_reply(result, wu, bavp, true); }
// Arrange that further results for this workunit // will be sent only to hosts with the given user ID. // This could be used, for example, so that late workunits // are sent only to cloud or cluster resources // int restrict_wu_to_user(WORKUNIT& _wu, int userid) { DB_RESULT result; DB_ASSIGNMENT asg; DB_WORKUNIT wu; wu = _wu; char buf[256]; int retval; // mark unsent results as DIDNT_NEED // sprintf(buf, "where workunitid=%d and server_state=%d", wu.id, RESULT_SERVER_STATE_UNSENT ); while (!result.enumerate(buf)) { char buf2[256]; sprintf(buf2, "server_state=%d, outcome=%d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_DIDNT_NEED ); result.update_field(buf2); } // mark the WU as TRANSITION_NO_NEW_RESULTS // sprintf(buf, "transitioner_flags=%d", TRANSITION_NO_NEW_RESULTS); retval = wu.update_field(buf); if (retval) return retval; // create an assignment record // asg.clear(); asg.create_time = time(0); asg.target_id = userid; asg.target_type = ASSIGN_USER; asg.multi = 0; asg.workunitid = wu.id; retval = asg.insert(); return retval; }
// Try to send the client this result // This can fail because: // - result needs more disk/mem/speed than host has // - already sent a result for this WU // - no app_version available // static int possibly_send_result(DB_RESULT& result) { DB_WORKUNIT wu; DB_RESULT result2; int retval, count; char buf[256]; BEST_APP_VERSION* bavp; retval = wu.lookup_id(result.workunitid); if (retval) return ERR_DB_NOT_FOUND; bavp = get_app_version(wu, true); if (!bavp && anonymous(g_request->platforms.list[0])) { char help_msg_buf[512]; sprintf(help_msg_buf, "To get more %s work, finish current work, stop BOINC, remove app_info.xml file, and restart.", config.long_name ); g_reply->insert_message(USER_MESSAGE(help_msg_buf, "high")); g_reply->set_delay(DELAY_ANONYMOUS); } if (!bavp) return ERR_NO_APP_VERSION; APP* app = ssp->lookup_app(wu.appid); if (wu_is_infeasible_fast(wu, *app, *bavp)) { return ERR_INSUFFICIENT_RESOURCE; } if (config.one_result_per_user_per_wu) { sprintf(buf, "where userid=%d and workunitid=%d", g_reply->user.id, wu.id); retval = result2.count(count, buf); if (retval) return ERR_DB_NOT_FOUND; if (count > 0) return ERR_WU_USER_RULE; } return add_result_to_reply(result, wu, bavp, true); }
int cancel_jobs(int min_id, int max_id) { DB_WORKUNIT wu; DB_RESULT result; char set_clause[256], where_clause[256]; int retval; sprintf(set_clause, "server_state=%d, outcome=%d", RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_DIDNT_NEED ); sprintf(where_clause, "server_state=%d and workunitid >=%d and workunitid<= %d", RESULT_SERVER_STATE_UNSENT, min_id, max_id ); retval = result.update_fields_noid(set_clause, where_clause); if (retval) return retval; sprintf(set_clause, "error_mask=error_mask|%d, transition_time=%d", WU_ERROR_CANCELLED, (int)(time(0)) ); sprintf(where_clause, "id>=%d and id<=%d", min_id, max_id); retval = wu.update_fields_noid(set_clause, where_clause); if (retval) return retval; return 0; }
// returns number of files found & added, or negative for error. // int find_antique_files() { char buf[256]; DB_WORKUNIT wu; check_stop_daemons(); // Find the oldest workunit. We could add // "where file_delete_state!=FILE_DELETE_DONE" to the query, // but this might create some race condition // with the 'regular' file delete mechanism, // so better to do it like this. // sprintf(buf, "order by id limit 1"); if (!wu.enumerate(buf)) { // Don't ever delete files younger than a month. // int days = 1 + (time(0) - wu.create_time)/86400; if (days<31) days=31; return add_antiques_to_list(days); } return 0; }
void JOB_DESC::create() { char buf[256]; int retval = create_work2( wu, wu_template, result_template_file, result_template_path, infiles, config, command_line, additional_xml ); if (retval) { fprintf(stderr, "create_work: %s\n", boincerror(retval)); exit(1); } if (assign_flag) { DB_ASSIGNMENT assignment; assignment.clear(); assignment.create_time = time(0); assignment.target_id = assign_id; assignment.target_type = assign_type; assignment.multi = assign_multi; assignment.workunitid = wu.id; retval = assignment.insert(); if (retval) { fprintf(stderr, "assignment.insert() failed: %s\n", boincerror(retval) ); exit(1); } sprintf(buf, "transitioner_flags=%d", assign_multi?TRANSITION_NONE:TRANSITION_NO_NEW_RESULTS ); retval = wu.update_field(buf); if (retval) { fprintf(stderr, "wu.update() failed: %s\n", boincerror(retval)); exit(1); } } }
int create_work( DB_WORKUNIT& wu, const char* _wu_template, const char* result_template_filename, const char* result_template_filepath, const char** infiles, int ninfiles, SCHED_CONFIG& config_loc, const char* command_line, const char* additional_xml ) { int retval; char _result_template[BLOB_SIZE]; char wu_template[BLOB_SIZE]; #if 0 retval = check_files(infiles, ninfiles, config_loc); if (retval) { fprintf(stderr, "Missing input file: %s\n", infiles[0]); return -1; } #endif strcpy(wu_template, _wu_template); wu.create_time = time(0); retval = process_input_template( wu, wu_template, infiles, ninfiles, config_loc, command_line, additional_xml ); if (retval) { fprintf(stderr, "process_input_template(): %d\n", retval); return retval; } retval = read_filename( result_template_filepath, _result_template, sizeof(_result_template) ); if (retval) { fprintf(stderr, "create_work: can't read result template file %s\n", result_template_filepath ); return retval; } if (strlen(result_template_filename) > sizeof(wu.result_template_file)-1) { fprintf(stderr, "result template filename is too big: %d bytes, max is %d\n", (int)strlen(result_template_filename), (int)sizeof(wu.result_template_file)-1 ); return ERR_BUFFER_OVERFLOW; } strlcpy(wu.result_template_file, result_template_filename, sizeof(wu.result_template_file)); if (wu.rsc_fpops_est == 0) { fprintf(stderr, "no rsc_fpops_est given; can't create job\n"); return ERR_NO_OPTION; } if (wu.rsc_fpops_bound == 0) { fprintf(stderr, "no rsc_fpops_bound given; can't create job\n"); return ERR_NO_OPTION; } if (wu.rsc_disk_bound == 0) { fprintf(stderr, "no rsc_disk_bound given; can't create job\n"); return ERR_NO_OPTION; } if (wu.target_nresults == 0) { fprintf(stderr, "no target_nresults given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_error_results == 0) { fprintf(stderr, "no max_error_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_total_results == 0) { fprintf(stderr, "no max_total_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_success_results == 0) { fprintf(stderr, "no max_success_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_success_results > wu.max_total_results) { fprintf(stderr, "max_success_results > max_total_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.max_error_results > wu.max_total_results) { fprintf(stderr, "max_error_results > max_total_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.target_nresults > wu.max_success_results) { fprintf(stderr, "target_nresults > max_success_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.transitioner_flags & TRANSITION_NONE) { wu.transition_time = INT_MAX; } else { wu.transition_time = time(0); } if (wu.id) { retval = wu.update(); if (retval) { fprintf(stderr, "create_work: workunit.update() %d\n", retval); return retval; } } else { retval = wu.insert(); if (retval) { fprintf(stderr, "create_work: workunit.insert() %d\n", retval); return retval; } wu.id = boinc_db.insert_id(); } return 0; }
int count_workunits(int& n, const char* query) { DB_WORKUNIT workunit; return workunit.count(n, query); }
// return true if we changed the file_delete_state of a WU or a result // bool do_pass(bool retry_error) { DB_WORKUNIT wu; DB_RESULT result; bool did_something = false; char buf[256]; char clause[256]; int retval, new_state; check_stop_daemons(); strcpy(clause, ""); if (id_modulus) { sprintf(clause, " and id %% %d = %d ", id_modulus, id_remainder); } if (dont_delete_batches) { strcat(clause, " and batch <= 0 "); } if (appid) { sprintf(buf, " and appid = %d ", appid); strcat(clause, buf); } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, WUS_PER_ENUM ); while (do_input_files) { retval = wu.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n"); exit(0); } break; } if (preserve_wu_files) { retval = 0; } else { retval = wu_delete_files(wu); } if (retval) { new_state = FILE_DELETE_ERROR; log_messages.printf(MSG_CRITICAL, "[WU#%d] file deletion failed: %s\n", wu.id, boincerror(retval) ); } else { new_state = FILE_DELETE_DONE; } if (new_state != wu.file_delete_state) { sprintf(buf, "file_delete_state=%d", new_state); retval = wu.update_field(buf); if (retval) { log_messages.printf(MSG_CRITICAL, "[WU#%d] update failed: %s\n", wu.id, boincerror(retval) ); } else { log_messages.printf(MSG_DEBUG, "[WU#%d] file_delete_state updated\n", wu.id ); did_something = true; } } } sprintf(buf, "where file_delete_state=%d %s limit %d", retry_error?FILE_DELETE_ERROR:FILE_DELETE_READY, clause, RESULTS_PER_ENUM ); while (do_output_files) { retval = result.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n"); exit(0); } break; } if (preserve_result_files) { retval = 0; } else { retval = result_delete_files(result); } if (retval) { new_state = FILE_DELETE_ERROR; log_messages.printf(MSG_CRITICAL, "[RESULT#%d] file deletion failed: %s\n", result.id, boincerror(retval) ); } else { new_state = FILE_DELETE_DONE; } if (new_state != result.file_delete_state) { sprintf(buf, "file_delete_state=%d", new_state); retval = result.update_field(buf); if (retval) { log_messages.printf(MSG_CRITICAL, "[RESULT#%d] update failed: %s\n", result.id, boincerror(retval) ); } else { log_messages.printf(MSG_DEBUG, "[RESULT#%d] file_delete_state updated\n", result.id ); did_something = true; } } } return did_something; }
int main(int argc, const char** argv) { DB_APP app; DB_WORKUNIT wu; int retval; char wu_template[BLOB_SIZE]; char wu_template_file[256], result_template_file[256], result_template_path[1024]; const char* command_line=NULL; const char** infiles = NULL; int i, ninfiles; char download_dir[256], db_name[256], db_passwd[256]; char db_user[256],db_host[256]; char buf[256]; char additional_xml[256]; bool assign_flag = false; bool assign_multi = false; int assign_id = 0; int assign_type; strcpy(result_template_file, ""); strcpy(app.name, ""); strcpy(db_passwd, ""); strcpy(additional_xml, ""); const char* config_dir = 0; i = 1; ninfiles = 0; wu.clear(); // defaults (in case they're not in WU template) wu.id = 0; wu.min_quorum = 2; wu.target_nresults = 2; wu.max_error_results = 3; wu.max_total_results = 10; wu.max_success_results = 6; wu.rsc_fpops_est = 3600e9; wu.rsc_fpops_bound = 86400e9; wu.rsc_memory_bound = 5e8; wu.rsc_disk_bound = 1e9; wu.rsc_bandwidth_bound = 0.0; wu.delay_bound = 7*86400; while (i < argc) { if (arg(argv, i, "appname")) { strcpy(app.name, argv[++i]); } else if (arg(argv, i, "wu_name")) { strcpy(wu.name, argv[++i]); } else if (arg(argv, i, "wu_template")) { strcpy(wu_template_file, argv[++i]); } else if (arg(argv, i, "result_template")) { strcpy(result_template_file, argv[++i]); } else if (arg(argv, i, "batch")) { wu.batch = atoi(argv[++i]); } else if (arg(argv, i, "config_dir")) { config_dir = argv[++i]; } else if (arg(argv, i, "batch")) { wu.batch = atoi(argv[++i]); } else if (arg(argv, i, "priority")) { wu.priority = atoi(argv[++i]); } else if (arg(argv, i, "rsc_fpops_est")) { wu.rsc_fpops_est = atof(argv[++i]); } else if (arg(argv, i, "rsc_fpops_bound")) { wu.rsc_fpops_bound = atof(argv[++i]); } else if (arg(argv, i, "rsc_memory_bound")) { wu.rsc_memory_bound = atof(argv[++i]); } else if (arg(argv, i, "rsc_disk_bound")) { wu.rsc_disk_bound = atof(argv[++i]); } else if (arg(argv, i, "delay_bound")) { wu.delay_bound = atoi(argv[++i]); } else if (arg(argv, i, "min_quorum")) { wu.min_quorum = atoi(argv[++i]); } else if (arg(argv, i, "target_nresults")) { wu.target_nresults = atoi(argv[++i]); } else if (arg(argv, i, "max_error_results")) { wu.max_error_results = atoi(argv[++i]); } else if (arg(argv, i, "max_total_results")) { wu.max_total_results = atoi(argv[++i]); } else if (arg(argv, i, "max_success_results")) { wu.max_success_results = atoi(argv[++i]); } else if (arg(argv, i, "opaque")) { wu.opaque = atoi(argv[++i]); } else if (arg(argv, i, "command_line")) { command_line= argv[++i]; } else if (arg(argv, i, "additional_xml")) { strcpy(additional_xml, argv[++i]); } else if (arg(argv, i, "wu_id")) { wu.id = atoi(argv[++i]); } else if (arg(argv, i, "assign_all")) { assign_multi = true; assign_flag = true; assign_type = ASSIGN_NONE; } else if (arg(argv, i, "assign_host")) { assign_flag = true; assign_type = ASSIGN_HOST; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "assign_user_one")) { assign_flag = true; assign_type = ASSIGN_USER; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "assign_user_all")) { assign_flag = true; assign_type = ASSIGN_USER; assign_multi = true; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "assign_team_one")) { assign_flag = true; assign_type = ASSIGN_TEAM; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "assign_team_all")) { assign_flag = true; assign_type = ASSIGN_TEAM; assign_multi = true; assign_id = atoi(argv[++i]); } else { if (!strncmp("-", argv[i], 1)) { fprintf(stderr, "create_work: bad argument '%s'\n", argv[i]); exit(1); } infiles = argv+i; ninfiles = argc - i; break; } i++; } #define CHKARG(x,m) do { if (!(x)) { fprintf(stderr, "create_work: bad command line: "m"\n"); exit(1); } } while (0) #define CHKARG_STR(v,m) CHKARG(strlen(v),m) CHKARG_STR(app.name , "need --appname"); CHKARG_STR(wu.name , "need --wu_name"); CHKARG_STR(wu_template_file , "need --wu_template"); CHKARG_STR(result_template_file , "need --result_template"); #undef CHKARG #undef CHKARG_STR if (assign_flag) { if (!strstr(wu.name, ASSIGNED_WU_STR)) { fprintf(stderr, "Assigned WU names must contain '%s'\n", ASSIGNED_WU_STR ); exit(1); } } retval = config.parse_file(); if (retval) { fprintf(stderr, "Can't parse config file: %d\n", retval); exit(1); } else { strcpy(db_name, config.db_name); strcpy(db_passwd, config.db_passwd); strcpy(db_user, config.db_user); strcpy(db_host, config.db_host); strcpy(download_dir, config.download_dir); } retval = boinc_db.open(db_name, db_host, db_user, db_passwd); if (retval) { fprintf(stderr, "create_work: error opening database: %d\n", retval ); exit(1); } sprintf(buf, "where name='%s'", app.name); retval = app.lookup(buf); if (retval) { fprintf(stderr, "create_work: app not found\n"); exit(1); } retval = read_filename(wu_template_file, wu_template, sizeof(wu_template)); if (retval) { fprintf(stderr, "create_work: can't open WU template: %d\n", retval); exit(1); } wu.appid = app.id; strcpy(result_template_path, "./"); strcat(result_template_path, result_template_file); retval = create_work( wu, wu_template, result_template_file, result_template_path, const_cast<const char **>(infiles), ninfiles, config, command_line, additional_xml ); if (retval) { fprintf(stderr, "create_work: %d\n", retval); exit(1); } if (assign_flag) { DB_ASSIGNMENT assignment; assignment.clear(); assignment.create_time = time(0); assignment.target_id = assign_id; assignment.target_type = assign_type; assignment.multi = assign_multi; assignment.workunitid = wu.id; retval = assignment.insert(); if (retval) { fprintf(stderr, "assignment.insert() failed: %d\n", retval); exit(1); } } boinc_db.close(); }
// send non-multi assigned jobs // bool send_assigned_jobs() { DB_ASSIGNMENT asg; DB_RESULT result; DB_WORKUNIT wu; bool sent_something = false; int retval; // for now, only look for user assignments // char buf[256]; sprintf(buf, "where target_type=%d and target_id=%d and multi=0", ASSIGN_USER, g_reply->user.id ); while (!asg.enumerate(buf)) { if (!work_needed(false)) continue; // if the WU doesn't exist, delete the assignment record. // retval = wu.lookup_id(asg.workunitid); if (retval) { asg.delete_from_db(); continue; } // don't send if WU is validation pending or completed, // or has transition pending // if (wu.need_validate) continue; if (wu.canonical_resultid) continue; if (wu.transition_time < time(0)) continue; // don't send if we already sent one to this host // sprintf(buf, "where workunitid=%d and hostid=%d", asg.workunitid, g_request->host.id ); retval = result.lookup(buf); if (retval != ERR_DB_NOT_FOUND) continue; // don't send if there's already one in progress to this user // sprintf(buf, "where workunitid=%d and userid=%d and server_state=%d", asg.workunitid, g_reply->user.id, RESULT_SERVER_STATE_IN_PROGRESS ); retval = result.lookup(buf); if (retval != ERR_DB_NOT_FOUND) continue; // OK, send the job // retval = send_assigned_job(asg); if (retval) continue; sent_something = true; // update the WU's transition time to time out this job // retval = wu.lookup_id(asg.workunitid); if (retval) continue; int new_tt = time(0) + wu.delay_bound; if (new_tt < wu.transition_time) { char buf2[256]; sprintf(buf2, "transition_time=%d", new_tt); wu.update_field(buf2); } } return sent_something; }
int main(int argc, const char** argv) { DB_APP app; DB_WORKUNIT wu; int retval; char wu_template[BLOB_SIZE]; char wu_template_file[256], result_template_file[256], result_template_path[MAXPATHLEN]; const char* command_line=NULL; const char** infiles = NULL; int i, ninfiles; char download_dir[256], db_name[256], db_passwd[256]; char db_user[256],db_host[256]; char buf[256]; char additional_xml[256]; bool show_wu_name = true; bool assign_flag = false; bool assign_multi = false; int assign_id = 0; int assign_type = ASSIGN_NONE; strcpy(wu_template_file, ""); strcpy(result_template_file, ""); strcpy(app.name, ""); strcpy(db_passwd, ""); strcpy(additional_xml, ""); const char* config_dir = 0; i = 1; ninfiles = 0; wu.clear(); // defaults (in case they're not in WU template) wu.id = 0; wu.min_quorum = 2; wu.target_nresults = 2; wu.max_error_results = 3; wu.max_total_results = 10; wu.max_success_results = 6; wu.rsc_fpops_est = 3600e9; wu.rsc_fpops_bound = 86400e9; wu.rsc_memory_bound = 5e8; wu.rsc_disk_bound = 1e9; wu.rsc_bandwidth_bound = 0.0; wu.delay_bound = 7*86400; while (i < argc) { if (arg(argv, i, "appname")) { strcpy(app.name, argv[++i]); } else if (arg(argv, i, "d")) { int dl = atoi(argv[++i]); log_messages.set_debug_level(dl); if (dl ==4) g_print_queries = true; } else if (arg(argv, i, "wu_name")) { show_wu_name = false; strcpy(wu.name, argv[++i]); } else if (arg(argv, i, "wu_template")) { strcpy(wu_template_file, argv[++i]); } else if (arg(argv, i, "result_template")) { strcpy(result_template_file, argv[++i]); } else if (arg(argv, i, "batch")) { wu.batch = atoi(argv[++i]); } else if (arg(argv, i, "config_dir")) { config_dir = argv[++i]; } else if (arg(argv, i, "batch")) { wu.batch = atoi(argv[++i]); } else if (arg(argv, i, "priority")) { wu.priority = atoi(argv[++i]); } else if (arg(argv, i, "rsc_fpops_est")) { wu.rsc_fpops_est = atof(argv[++i]); } else if (arg(argv, i, "rsc_fpops_bound")) { wu.rsc_fpops_bound = atof(argv[++i]); } else if (arg(argv, i, "rsc_memory_bound")) { wu.rsc_memory_bound = atof(argv[++i]); } else if (arg(argv, i, "rsc_disk_bound")) { wu.rsc_disk_bound = atof(argv[++i]); } else if (arg(argv, i, "delay_bound")) { wu.delay_bound = atoi(argv[++i]); } else if (arg(argv, i, "min_quorum")) { wu.min_quorum = atoi(argv[++i]); } else if (arg(argv, i, "target_nresults")) { wu.target_nresults = atoi(argv[++i]); } else if (arg(argv, i, "max_error_results")) { wu.max_error_results = atoi(argv[++i]); } else if (arg(argv, i, "max_total_results")) { wu.max_total_results = atoi(argv[++i]); } else if (arg(argv, i, "max_success_results")) { wu.max_success_results = atoi(argv[++i]); } else if (arg(argv, i, "opaque")) { wu.opaque = atoi(argv[++i]); } else if (arg(argv, i, "command_line")) { command_line= argv[++i]; } else if (arg(argv, i, "additional_xml")) { strcpy(additional_xml, argv[++i]); } else if (arg(argv, i, "wu_id")) { wu.id = atoi(argv[++i]); } else if (arg(argv, i, "broadcast")) { assign_multi = true; assign_flag = true; assign_type = ASSIGN_NONE; } else if (arg(argv, i, "broadcast_user")) { assign_flag = true; assign_type = ASSIGN_USER; assign_multi = true; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "broadcast_team")) { assign_flag = true; assign_type = ASSIGN_TEAM; assign_multi = true; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "target_host")) { assign_flag = true; assign_type = ASSIGN_HOST; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "target_user")) { assign_flag = true; assign_type = ASSIGN_USER; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "target_team")) { assign_flag = true; assign_type = ASSIGN_TEAM; assign_id = atoi(argv[++i]); } else if (arg(argv, i, "help")) { usage(); exit(0); } else { if (!strncmp("-", argv[i], 1)) { fprintf(stderr, "create_work: bad argument '%s'\n", argv[i]); exit(1); } infiles = argv+i; ninfiles = argc - i; break; } i++; } if (!strlen(app.name)) { usage(); } if (!strlen(wu.name)) { sprintf(wu.name, "%s_%d_%f", app.name, getpid(), dtime()); } if (!strlen(wu_template_file)) { sprintf(wu_template_file, "templates/%s_in", app.name); } if (!strlen(result_template_file)) { sprintf(result_template_file, "templates/%s_out", app.name); } retval = config.parse_file(config_dir); if (retval) { fprintf(stderr, "Can't parse config file: %s\n", boincerror(retval)); exit(1); } else { strcpy(db_name, config.db_name); strcpy(db_passwd, config.db_passwd); strcpy(db_user, config.db_user); strcpy(db_host, config.db_host); strcpy(download_dir, config.download_dir); } retval = boinc_db.open(db_name, db_host, db_user, db_passwd); if (retval) { fprintf(stderr, "create_work: error opening database: %s\n", boincerror(retval) ); exit(1); } sprintf(buf, "where name='%s'", app.name); retval = app.lookup(buf); if (retval) { fprintf(stderr, "create_work: app not found\n"); exit(1); } retval = read_filename(wu_template_file, wu_template, sizeof(wu_template)); if (retval) { fprintf(stderr, "create_work: can't open input template %s\n", wu_template_file ); exit(1); } wu.appid = app.id; strcpy(result_template_path, "./"); strcat(result_template_path, result_template_file); retval = create_work( wu, wu_template, result_template_file, result_template_path, const_cast<const char **>(infiles), ninfiles, config, command_line, additional_xml ); if (retval) { fprintf(stderr, "create_work: %s\n", boincerror(retval)); exit(1); } else { if (show_wu_name) { printf("workunit name: %s\n", wu.name); } } if (assign_flag) { DB_ASSIGNMENT assignment; assignment.clear(); assignment.create_time = time(0); assignment.target_id = assign_id; assignment.target_type = assign_type; assignment.multi = assign_multi; assignment.workunitid = wu.id; retval = assignment.insert(); if (retval) { fprintf(stderr, "assignment.insert() failed: %s\n", boincerror(retval) ); exit(1); } sprintf(buf, "transitioner_flags=%d", assign_multi?TRANSITION_NONE:TRANSITION_NO_NEW_RESULTS ); retval = wu.update_field(buf); if (retval) { fprintf(stderr, "wu.update() failed: %s\n", boincerror(retval)); exit(1); } } boinc_db.close(); }
// assimilate all WUs that need it // return nonzero (true) if did anything // bool do_pass(APP& app) { DB_WORKUNIT wu; DB_RESULT canonical_result, result; bool did_something = false; char buf[256]; char mod_clause[256]; int retval; int num_assimilated=0; check_stop_daemons(); if (wu_id_modulus) { sprintf(mod_clause, " and workunit.id %% %d = %d ", wu_id_modulus, wu_id_remainder ); } else { strcpy(mod_clause, ""); } sprintf(buf, "where appid=%d and assimilate_state=%d %s limit %d", app.id, ASSIMILATE_READY, mod_clause, one_pass_N_WU ? one_pass_N_WU : 1000 ); while (1) { retval = wu.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n" ); exit(0); } break; } vector<RESULT> results; // must be inside while()! // for testing purposes, pretend we did nothing // if (update_db) { did_something = true; } log_messages.printf(MSG_DEBUG, "[%s] assimilating WU %d; state=%d\n", wu.name, wu.id, wu.assimilate_state ); sprintf(buf, "where workunitid=%d", wu.id); canonical_result.clear(); bool found = false; while (1) { retval = result.enumerate(buf); if (retval) { if (retval != ERR_DB_NOT_FOUND) { log_messages.printf(MSG_DEBUG, "DB connection lost, exiting\n" ); exit(0); } break; } results.push_back(result); if (result.id == wu.canonical_resultid) { canonical_result = result; found = true; } } // If no canonical result found and WU had no other errors, // something is wrong, e.g. result records got deleted prematurely. // This is probably unrecoverable, so mark the WU as having // an assimilation error and keep going. // if (!found && !wu.error_mask) { log_messages.printf(MSG_CRITICAL, "[%s] no canonical result\n", wu.name ); wu.error_mask = WU_ERROR_NO_CANONICAL_RESULT; sprintf(buf, "error_mask=%d", wu.error_mask); wu.update_field(buf); } retval = assimilate_handler(wu, results, canonical_result); if (retval && retval != DEFER_ASSIMILATION) { log_messages.printf(MSG_CRITICAL, "[%s] handler error: %s; exiting\n", wu.name, boincerror(retval) ); exit(retval); } if (update_db) { // Defer assimilation until next result is returned int assimilate_state = ASSIMILATE_DONE; if (retval == DEFER_ASSIMILATION) { assimilate_state = ASSIMILATE_INIT; } sprintf( buf, "assimilate_state=%d, transition_time=%d", assimilate_state, (int)time(0) ); retval = wu.update_field(buf); if (retval) { log_messages.printf(MSG_CRITICAL, "[%s] update failed: %s\n", wu.name, boincerror(retval) ); exit(1); } } num_assimilated++; } if (did_something) { boinc_db.commit_transaction(); } if (num_assimilated) { log_messages.printf(MSG_NORMAL, "Assimilated %d workunits.\n", num_assimilated ); } return did_something; }
// variant where input files are described by INFILE_DESCS, // so you can have remote files etc. // // If query_string is present, don't actually create the job; // instead, append to the query string. // The caller is responsible for doing the query. // int create_work2( DB_WORKUNIT& wu, const char* _wu_template, const char* result_template_filename, // relative to project root; stored in DB const char* /* result_template_filepath*/, // deprecated vector<INFILE_DESC> &infiles, SCHED_CONFIG& config_loc, const char* command_line, const char* additional_xml, char* query_string ) { int retval; char wu_template[BLOB_SIZE]; #if 0 retval = check_files(infiles, ninfiles, config_loc); if (retval) { fprintf(stderr, "Missing input file: %s\n", infiles[0]); return -1; } #endif safe_strcpy(wu_template, _wu_template); wu.create_time = time(0); retval = process_input_template( wu, wu_template, infiles, config_loc, command_line, additional_xml ); if (retval) { fprintf(stderr, "process_input_template(): %s\n", boincerror(retval)); return retval; } // check for presence of result template. // we don't need to actually look at it. // const char* p = config_loc.project_path(result_template_filename); if (!boinc_file_exists(p)) { fprintf(stderr, "create_work: result template file %s doesn't exist\n", p ); return retval; } if (strlen(result_template_filename) > sizeof(wu.result_template_file)-1) { fprintf(stderr, "result template filename is too big: %d bytes, max is %d\n", (int)strlen(result_template_filename), (int)sizeof(wu.result_template_file)-1 ); return ERR_BUFFER_OVERFLOW; } strlcpy(wu.result_template_file, result_template_filename, sizeof(wu.result_template_file)); if (wu.rsc_fpops_est == 0) { fprintf(stderr, "no rsc_fpops_est given; can't create job\n"); return ERR_NO_OPTION; } if (wu.rsc_fpops_bound == 0) { fprintf(stderr, "no rsc_fpops_bound given; can't create job\n"); return ERR_NO_OPTION; } if (wu.rsc_disk_bound == 0) { fprintf(stderr, "no rsc_disk_bound given; can't create job\n"); return ERR_NO_OPTION; } if (wu.target_nresults == 0) { fprintf(stderr, "no target_nresults given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_error_results == 0) { fprintf(stderr, "no max_error_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_total_results == 0) { fprintf(stderr, "no max_total_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_success_results == 0) { fprintf(stderr, "no max_success_results given; can't create job\n"); return ERR_NO_OPTION; } if (wu.max_success_results > wu.max_total_results) { fprintf(stderr, "max_success_results > max_total_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.max_error_results > wu.max_total_results) { fprintf(stderr, "max_error_results > max_total_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.target_nresults > wu.max_success_results) { fprintf(stderr, "target_nresults > max_success_results; can't create job\n"); return ERR_INVALID_PARAM; } if (wu.transitioner_flags) { wu.transition_time = INT_MAX; } else { wu.transition_time = time(0); } if (query_string) { wu.db_print_values(query_string); } else if (wu.id) { retval = wu.update(); if (retval) { fprintf(stderr, "create_work: workunit.update() %s\n", boincerror(retval) ); return retval; } } else { retval = wu.insert(); if (retval) { fprintf(stderr, "create_work: workunit.insert() %s\n", boincerror(retval) ); return retval; } wu.id = boinc_db.insert_id(); } return 0; }
int main(int argc, char** argv) { DB_APP app; int retval; int i; char download_dir[256], db_name[256], db_passwd[256]; char db_user[256],db_host[256]; char buf[4096]; JOB_DESC jd; bool show_wu_name = true; bool use_stdin = false; strcpy(app.name, ""); strcpy(db_passwd, ""); const char* config_dir = 0; i = 1; while (i < argc) { if (arg(argv, i, "appname")) { safe_strcpy(app.name, argv[++i]); } else if (arg(argv, i, "d")) { int dl = atoi(argv[++i]); log_messages.set_debug_level(dl); if (dl ==4) g_print_queries = true; } else if (arg(argv, i, "wu_name")) { show_wu_name = false; safe_strcpy(jd.wu.name, argv[++i]); } else if (arg(argv, i, "wu_template")) { safe_strcpy(jd.wu_template_file, argv[++i]); } else if (arg(argv, i, "result_template")) { safe_strcpy(jd.result_template_file, argv[++i]); } else if (arg(argv, i, "config_dir")) { config_dir = argv[++i]; } else if (arg(argv, i, "batch")) { jd.wu.batch = atoi(argv[++i]); } else if (arg(argv, i, "priority")) { jd.wu.priority = atoi(argv[++i]); } else if (arg(argv, i, "credit")) { jd.wu.canonical_credit = atof(argv[++i]); } else if (arg(argv, i, "rsc_fpops_est")) { jd.wu.rsc_fpops_est = atof(argv[++i]); } else if (arg(argv, i, "rsc_fpops_bound")) { jd.wu.rsc_fpops_bound = atof(argv[++i]); } else if (arg(argv, i, "rsc_memory_bound")) { jd.wu.rsc_memory_bound = atof(argv[++i]); } else if (arg(argv, i, "size_class")) { jd.wu.size_class = atoi(argv[++i]); } else if (arg(argv, i, "app_version_num")) { jd.wu.app_version_num = atoi(argv[++i]); } else if (arg(argv, i, "rsc_disk_bound")) { jd.wu.rsc_disk_bound = atof(argv[++i]); } else if (arg(argv, i, "delay_bound")) { jd.wu.delay_bound = atoi(argv[++i]); } else if (arg(argv, i, "hr_class")) { jd.wu.hr_class = atoi(argv[++i]); } else if (arg(argv, i, "min_quorum")) { jd.wu.min_quorum = atoi(argv[++i]); } else if (arg(argv, i, "target_nresults")) { jd.wu.target_nresults = atoi(argv[++i]); } else if (arg(argv, i, "max_error_results")) { jd.wu.max_error_results = atoi(argv[++i]); } else if (arg(argv, i, "max_total_results")) { jd.wu.max_total_results = atoi(argv[++i]); } else if (arg(argv, i, "max_success_results")) { jd.wu.max_success_results = atoi(argv[++i]); } else if (arg(argv, i, "opaque")) { jd.wu.opaque = atoi(argv[++i]); } else if (arg(argv, i, "command_line")) { jd.command_line= argv[++i]; } else if (arg(argv, i, "wu_id")) { jd.wu.id = atoi(argv[++i]); } else if (arg(argv, i, "broadcast")) { jd.assign_multi = true; jd.assign_flag = true; jd.assign_type = ASSIGN_NONE; } else if (arg(argv, i, "broadcast_user")) { jd.assign_flag = true; jd.assign_type = ASSIGN_USER; jd.assign_multi = true; jd.assign_id = atoi(argv[++i]); check_assign_id(jd.assign_id); } else if (arg(argv, i, "broadcast_team")) { jd.assign_flag = true; jd.assign_type = ASSIGN_TEAM; jd.assign_multi = true; jd.assign_id = atoi(argv[++i]); check_assign_id(jd.assign_id); } else if (arg(argv, i, "target_host")) { jd.assign_flag = true; jd.assign_type = ASSIGN_HOST; jd.assign_id = atoi(argv[++i]); check_assign_id(jd.assign_id); } else if (arg(argv, i, "target_user")) { jd.assign_flag = true; jd.assign_type = ASSIGN_USER; jd.assign_id = atoi(argv[++i]); check_assign_id(jd.assign_id); } else if (arg(argv, i, "target_team")) { jd.assign_flag = true; jd.assign_type = ASSIGN_TEAM; jd.assign_id = atoi(argv[++i]); check_assign_id(jd.assign_id); } else if (arg(argv, i, "help")) { usage(); exit(0); } else if (arg(argv, i, "stdin")) { use_stdin = true; } else if (arg(argv, i, (char*)"remote_file")) { INFILE_DESC id; id.is_remote = true; safe_strcpy(id.url, argv[++i]); id.nbytes = atof(argv[++i]); safe_strcpy(id.md5, argv[++i]); jd.infiles.push_back(id); } else if (arg(argv, i, "verbose")) { verbose = true; } else if (arg(argv, i, "continue_on_error")) { continue_on_error = true; } else if (arg(argv, i, "keywords")) { strcpy(jd.wu.keywords, argv[++i]); } else { if (!strncmp("-", argv[i], 1)) { fprintf(stderr, "create_work: bad argument '%s'\n", argv[i]); exit(1); } INFILE_DESC id; id.is_remote = false; safe_strcpy(id.name, argv[i]); jd.infiles.push_back(id); } i++; } if (!strlen(app.name)) { usage(); } if (!strlen(jd.wu.name)) { sprintf(jd.wu.name, "%s_%d_%f", app.name, getpid(), dtime()); } if (!strlen(jd.wu_template_file)) { sprintf(jd.wu_template_file, "templates/%s_in", app.name); } if (!strlen(jd.result_template_file)) { sprintf(jd.result_template_file, "templates/%s_out", app.name); } retval = config.parse_file(config_dir); if (retval) { fprintf(stderr, "Can't parse config file: %s\n", boincerror(retval)); exit(1); } else { strcpy(db_name, config.db_name); strcpy(db_passwd, config.db_passwd); strcpy(db_user, config.db_user); strcpy(db_host, config.db_host); strcpy(download_dir, config.download_dir); } retval = boinc_db.open(db_name, db_host, db_user, db_passwd); if (retval) { fprintf(stderr, "create_work: error opening database: %s\n", boincerror(retval) ); exit(1); } boinc_db.set_isolation_level(READ_UNCOMMITTED); sprintf(buf, "where name='%s'", app.name); retval = app.lookup(buf); if (retval) { fprintf(stderr, "create_work: app not found\n"); exit(1); } // read the WU template file. // this won't get used if we're creating a batch // with job-level WU templates // if (boinc_file_exists(jd.wu_template_file)) { retval = read_filename( jd.wu_template_file, jd.wu_template, sizeof(jd.wu_template) ); if (retval) { fprintf(stderr, "create_work: can't open input template %s\n", jd.wu_template_file ); exit(1); } } jd.wu.appid = app.id; strcpy(jd.result_template_path, "./"); strcat(jd.result_template_path, jd.result_template_file); if (use_stdin) { // clear the WU template name so we'll recognize a job-level one // strcpy(jd.wu_template_file, ""); if (jd.assign_flag) { // if we're doing assignment we can't use the bulk-query method; // create the jobs one at a time. // int _argc; char* _argv[100]; for (int j=0; ; j++) { char* p = fgets(buf, sizeof(buf), stdin); if (p == NULL) break; JOB_DESC jd2 = jd; strcpy(jd2.wu.name, ""); _argc = parse_command_line(buf, _argv); jd2.parse_cmdline(_argc, _argv); if (!strlen(jd2.wu.name)) { sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); } if (strlen(jd2.wu_template_file)) { get_wu_template(jd2); } if (!strlen(jd2.wu_template)) { fprintf(stderr, "job is missing input template\n"); exit(1); } jd2.create(); } } else { string values; DB_WORKUNIT wu; int _argc; char* _argv[100], value_buf[MAX_QUERY_LEN]; for (int j=0; ; j++) { char* p = fgets(buf, sizeof(buf), stdin); if (p == NULL) break; JOB_DESC jd2 = jd; strcpy(jd2.wu.name, ""); _argc = parse_command_line(buf, _argv); jd2.parse_cmdline(_argc, _argv); if (!strlen(jd2.wu.name)) { sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); } // if the stdin line specified assignment, // create the job individually // if (jd2.assign_flag) { jd2.create(); continue; } // otherwise accumulate a SQL query so that we can // create jobs en masse // if (strlen(jd2.wu_template_file)) { get_wu_template(jd2); } if (!strlen(jd2.wu_template)) { fprintf(stderr, "job is missing input template\n"); exit(1); } retval = create_work2( jd2.wu, jd2.wu_template, jd2.result_template_file, jd2.result_template_path, jd2.infiles, config, jd2.command_line, NULL, value_buf ); if (retval) { fprintf(stderr, "create_work() failed: %d\n", retval); if (continue_on_error) { continue; } else { exit(1); } } if (values.size()) { values += ","; values += value_buf; } else { values = value_buf; } // MySQL can handles queries at least 1 MB // int n = strlen(value_buf); if (values.size() + 2*n > 1000000) { retval = wu.insert_batch(values); if (retval) { fprintf(stderr, "wu.insert_batch() failed: %d; size %d\n", retval, (int)values.size() ); fprintf(stderr, "MySQL error: %s\n", boinc_db.error_string() ); exit(1); } values.clear(); } } if (values.size()) { retval = wu.insert_batch(values); if (retval) { fprintf(stderr, "wu.insert_batch() failed: %d\n", retval ); fprintf(stderr, "MySQL error: %s\n", boinc_db.error_string() ); exit(1); } } } } else { jd.create(); if (show_wu_name) { printf("workunit name: %s\n", jd.wu.name); } } boinc_db.close(); }
// resend any jobs that: // 1) we already sent to this host; // 2) are still in progress (i.e. haven't timed out) and // 3) aren't present on the host // Return true if there were any such jobs // bool resend_lost_work() { SCHED_DB_RESULT result; std::vector<DB_RESULT>results; unsigned int i; char buf[256]; char warning_msg[256]; bool did_any = false; int num_eligible_to_resend=0; int num_resent=0; BEST_APP_VERSION* bavp = NULL; APP* app = NULL; int retval; sprintf(buf, " where hostid=%d and server_state=%d ", g_reply->host.id, RESULT_SERVER_STATE_IN_PROGRESS ); while (!result.enumerate(buf)) { if (!work_needed(false)) { result.end_enumerate(); break; } bool found = false; for (i=0; i<g_request->other_results.size(); i++) { OTHER_RESULT& orp = g_request->other_results[i]; if (!strcmp(orp.name, result.name)) { found = true; break; } } if (found) continue; num_eligible_to_resend++; if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] [HOST#%d] found lost [RESULT#%u]: %s\n", g_reply->host.id, result.id, result.name ); } DB_WORKUNIT wu; bool can_resend = true; retval = wu.lookup_id(result.workunitid); if (retval) { log_messages.printf(MSG_CRITICAL, "[HOST#%d] can't resend - WU not found for [RESULT#%u]\n", g_reply->host.id, result.id ); can_resend = false; } if (can_resend) { app = ssp->lookup_app(wu.appid); bavp = get_app_version(wu, true, false); if (!bavp) { if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[HOST#%d] can't resend [RESULT#%u]: no app version for %s\n", g_reply->host.id, result.id, app->name ); } can_resend = false; } } if (can_resend && wu.error_mask) { if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] skipping [RESULT#%u]: WU error mask %d\n", result.id, wu.error_mask ); } can_resend = false; } if (can_resend && wu.canonical_resultid) { if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] skipping [RESULT#%u]: already have canonical result\n", result.id ); } can_resend = false; } if (can_resend && wu_is_infeasible_fast( wu, result.server_state, result.priority, result.report_deadline, *app, *bavp )) { if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] skipping [RESULT#%u]: feasibility check failed\n", result.id ); } can_resend = false; } if (can_resend && possibly_give_result_new_deadline(result, wu, *bavp)) { if (config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] skipping [RESULT#%u]: deadline assignment failed\n", result.id ); } can_resend = false; } // If we can't resend this job for any of the above reasons, // make it time out so that the transitioner does the right thing. // if (!can_resend) { result.report_deadline = time(0)-1; retval = result.mark_as_sent(result.server_state, config.report_grace_period); if (retval) { log_messages.printf(MSG_CRITICAL, "resend_lost_work: can't update result deadline: %s\n", boincerror(retval) ); continue; } retval = update_wu_on_send( wu, result.report_deadline + config.report_grace_period, *app, *bavp ); if (retval) { log_messages.printf(MSG_CRITICAL, "resend_lost_result: can't update WU transition time: %s\n", boincerror(retval) ); continue; } sprintf(warning_msg, "Didn't resend lost task %s (expired)", result.name ); g_reply->insert_message(warning_msg, "low"); } else { retval = add_result_to_reply(result, wu, bavp, false); if (retval) { log_messages.printf(MSG_CRITICAL, "[HOST#%d] failed to send [RESULT#%u]\n", g_reply->host.id, result.id ); continue; } sprintf(warning_msg, "Resent lost task %s", result.name); g_reply->insert_message(warning_msg, "low"); num_resent++; did_any = true; if (g_wreq->njobs_sent >= config.max_wus_to_send) { result.end_enumerate(); break; } } } if (num_eligible_to_resend && config.debug_resend) { log_messages.printf(MSG_NORMAL, "[resend] [HOST#%d] %d lost results, resent %d\n", g_reply->host.id, num_eligible_to_resend, num_resent ); } return did_any; }