// Find a file with work, and send. // This is guaranteed to send work if ANY is available for this user. // However, it ignores the working set, // and should be done only if we fail to send work from the working set. // // logic: // min_resultname = "" // loop // R = first unsent result where filename>min_resultname order by filename // // order by filename implies order by ID // send_results_for_file(R.filename) // // this skips disqualified results // min_resultname = R.filename; // static int send_new_file_work_deterministic_seeded( int& nsent, const char *start_f, const char *end_f ) { SCHED_DB_RESULT result; char filename[256], min_resultname[256], query[1024]; int retval; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_new_file_work_deterministic_seeded() start=%s end=%s\n", start_f, end_f?end_f:"+Inf" ); } strcpy(min_resultname, start_f); while (1) { // are we done with the search yet? if (end_f && strcmp(min_resultname, end_f)>=0) break; #if 0 // an alternative here is to add ANOTHER index on name, server_state // to the result table. sprintf(query, "INNER JOIN (SELECT id FROM result WHERE server_state=%d and name>'%s' order by name limit 1) AS single USING (id)", RESULT_SERVER_STATE_UNSENT, min_resultname ); #endif sprintf(query, "INNER JOIN (SELECT id FROM result WHERE name>'%s' order by name limit 1) AS single USING (id)", min_resultname ); retval = result.lookup(query); if (retval) break; // no more unsent results or at the end of the filenames, return -1 retval = extract_filename(result.name, filename); if (retval) return retval; // not locality scheduled, now what??? if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_new_file_work_deterministic will try filename %s\n", filename ); } retval = send_results_for_file(filename, nsent, false); if (retval==ERR_NO_APP_VERSION || retval==ERR_INSUFFICIENT_RESOURCE) return retval; if (nsent>0 || !work_needed(true)) break; // construct a name which is lexically greater than the name of any result // which uses this file. sprintf(min_resultname, "%s__~", filename); } return 0; }
static int send_new_file_work_working_set() { char filename[256]; int retval, nsent; retval = get_working_set_filename(filename, is_host_slow()); if (retval) return retval; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_new_file_working_set will try filename %s\n", filename ); } return send_results_for_file(filename, nsent, true); }
// DAVID, this is missing a return value! Am I right that this will // also eventually move 'non locality' work through and out of the // system? // // This looks for work created in the range t_min < t < t_max. Use // t_min=INT_MIN if you wish to leave off the left constraint. // static int send_old_work(int t_min, int t_max) { char buf[1024], filename[256]; int retval, extract_retval, nsent; SCHED_DB_RESULT result; int now=time(0); if (!work_needed(true)) { return 0; } // restrict values to full hours; // this allows the DB to cache query results in some cases // t_max = (t_max/3600)*3600; boinc_db.start_transaction(); // Note: the following queries look convoluted. // But apparently the simpler versions (without the inner join) // are a lot slower. // if (t_min != INT_MIN) { sprintf(buf, "INNER JOIN (SELECT id FROM result WHERE server_state=%d and %d<create_time and create_time<%d limit 1) AS single USING (id)", RESULT_SERVER_STATE_UNSENT, t_min, t_max ); } else { sprintf(buf, "INNER JOIN (SELECT id FROM result WHERE server_state=%d and create_time<%d limit 1) AS single USING (id)", RESULT_SERVER_STATE_UNSENT, t_max ); } retval = result.lookup(buf); if (!retval) { retval = possibly_send_result(result); boinc_db.commit_transaction(); if (!retval) { double age=(now-result.create_time)/3600.0; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work(%s) sent result created %.1f hours ago [RESULT#%d]\n", result.name, age, result.id ); } extract_retval=extract_filename(result.name, filename); if (!extract_retval) { send_results_for_file(filename, nsent, false); } else { // David, is this right? Is this the only place in // the locality scheduler that non-locality work // // gets done? if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] Note: sent NON-LOCALITY result %s\n", result.name ); } } } else if (retval == ERR_NO_APP_VERSION || retval==ERR_INSUFFICIENT_RESOURCE) { // if no app version found or no resources, give up completely! return retval; } } else { boinc_db.commit_transaction(); } if (retval) { double older=(now-t_max)/3600.0; if (t_min != INT_MIN) { double young=(now-t_min)/3600.0; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work() no feasible result younger than %.1f hours and older than %.1f hours\n", young, older ); } } else { if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work() no feasible result older than %.1f hours\n", older ); } } } // DAVID, YOU CHANGED THIS FROM VOID TO INT. IS THIS THE RIGHT // RETURN VAL? You should probably use the return value from // sent_results_for_file as well. return retval; }
void send_work_locality() { int i, nsent, nfiles, j; // seed the random number generator unsigned int seed=time(0)+getpid(); srand(seed); #ifdef EINSTEIN_AT_HOME std::vector<FILE_INFO> eah_copy = g_request->file_infos; g_request->file_infos.clear(); g_request->files_not_needed.clear(); nfiles = (int) eah_copy.size(); for (i=0; i<nfiles; i++) { char *fname = eah_copy[i].name; if (is_workunit_file(fname)) { // these are files that we will use for locality scheduling and // to search for work // g_request->file_infos.push_back(eah_copy[i]); } else if (is_sticky_file(fname)) { // was if(!data_files) // these files MIGHT be deleted from host if we need to make // disk space there // g_request->file_delete_candidates.push_back(eah_copy[i]); if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] [HOST#%d] removing file %s from file_infos list\n", g_reply->host.id, fname ); } } else { // these files WILL be deleted from the host // g_request->files_not_needed.push_back(eah_copy[i]); if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] [HOST#%d] adding file %s to files_not_needed list\n", g_reply->host.id, fname ); } } } #endif // EINSTEIN_AT_HOME nfiles = (int) g_request->file_infos.size(); for (i=0; i<nfiles; i++) if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] [HOST#%d] has file %s\n", g_reply->host.id, g_request->file_infos[i].name ); } // Look for work in order of increasing file name, or randomly? // if (config.locality_scheduling_sorted_order) { sort(g_request->file_infos.begin(), g_request->file_infos.end(), file_info_order); j = 0; } else { if (!nfiles) nfiles = 1; j = rand()%nfiles; } // send old work if there is any. send this only to hosts which have // high-bandwidth connections, since asking dial-up users to upload // (presumably large) data files is onerous. // if (config.locality_scheduling_send_timeout && g_request->host.n_bwdown>100000) { int until=time(0)-config.locality_scheduling_send_timeout; int retval_sow=send_old_work(INT_MIN, until); if (retval_sow==ERR_NO_APP_VERSION || retval_sow==ERR_INSUFFICIENT_RESOURCE) return; } // send work for existing files // for (i=0; i<(int)g_request->file_infos.size(); i++) { int k = (i+j)%nfiles; int retval_srff; if (!work_needed(true)) break; FILE_INFO& fi = g_request->file_infos[k]; retval_srff=send_results_for_file( fi.name, nsent, false ); if (retval_srff==ERR_NO_APP_VERSION || retval_srff==ERR_INSUFFICIENT_RESOURCE) return; // if we couldn't send any work for this file, and we STILL need work, // then it must be that there was no additional work remaining for this // file which is feasible for this host. In this case, delete the file. // If the work was not sent for other (dynamic) reason such as insufficient // cpu, then DON'T delete the file. // if (nsent == 0 && work_needed(true) && config.file_deletion_strategy == 1) { g_reply->file_deletes.push_back(fi); if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] [HOST#%d]: delete file %s (not needed)\n", g_reply->host.id, fi.name ); } #ifdef EINSTEIN_AT_HOME // For name matching pattern h1_XXXX.XX_S5R4 // generate corresponding l1_XXXX.XX_S5R4 and *_S5R7 patterns and delete it also // if (strlen(fi.name)==15 && !strncmp("h1_", fi.name, 3)) { FILE_INFO fil4,fil7,fih7; fil4=fi; fil4.name[0]='l'; fil7=fil4; fil7.name[14]='7'; fih7=fi; fih7.name[14]='7'; g_reply->file_deletes.push_back(fil4); g_reply->file_deletes.push_back(fil7); g_reply->file_deletes.push_back(fih7); if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] [HOST#%d]: delete files %s,%s,%s (not needed)\n", g_reply->host.id, fil4.name,fil7.name,fih7.name ); } } #endif } // nsent==0 } // loop over files already on the host // send new files if needed // if (work_needed(true)) { send_new_file_work(); } }
// DAVID, this is missing a return value! Am I right that this will // also eventually move 'non locality' work through and out of the // system? // // This looks for work created in the range t_min < t < t_max. Use // t_min=INT_MIN if you wish to leave off the left constraint. // static int send_old_work(int t_min, int t_max) { char buf[1024], filename[256]; int retval, extract_retval, nsent; DB_RESULT result; int now=time(0); if (!work_needed(true)) { return 0; } boinc_db.start_transaction(); if (t_min != INT_MIN) { sprintf(buf, "where server_state=%d and %d<create_time and create_time<%d limit 1", RESULT_SERVER_STATE_UNSENT, t_min, t_max ); } else { sprintf(buf, "where server_state=%d and create_time<%d limit 1", RESULT_SERVER_STATE_UNSENT, t_max ); } retval = result.lookup(buf); if (!retval) { retval = possibly_send_result(result); boinc_db.commit_transaction(); if (!retval) { double age=(now-result.create_time)/3600.0; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work(%s) sent result created %.1f hours ago [RESULT#%d]\n", result.name, age, result.id ); } extract_retval=extract_filename(result.name, filename); if (!extract_retval) { send_results_for_file(filename, nsent, false); } else { // David, is this right? Is this the only place in // the locality scheduler that non-locality work // // gets done? if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] Note: sent NON-LOCALITY result %s\n", result.name ); } } } else if (retval == ERR_NO_APP_VERSION || retval==ERR_INSUFFICIENT_RESOURCE) { // if no app version found or no resources, give up completely! return retval; } } else { boinc_db.commit_transaction(); } if (retval) { double older=(now-t_max)/3600.0; if (t_min != INT_MIN) { double young=(now-t_min)/3600.0; if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work() no feasible result younger than %.1f hours and older than %.1f hours\n", young, older ); } } else { if (config.debug_locality) { log_messages.printf(MSG_NORMAL, "[locality] send_old_work() no feasible result older than %.1f hours\n", older ); } } } // DAVID, YOU CHANGED THIS FROM VOID TO INT. IS THIS THE RIGHT // RETURN VAL? You should probably use the return value from // sent_results_for_file as well. return retval; }