// "avg" is the average PFC for this app // over CPU versions or GPU versions, whichever is lowest. // Update the pfc_scale of this app's versions in the DB, // and update app.min_avg_pfc // int scale_versions(APP& app, double avg, SCHED_SHMEM* ssp) { char buf[256]; int retval; for (int j=0; j<ssp->napp_versions; j++) { APP_VERSION& av = ssp->app_versions[j]; if (av.appid != app.id) continue; if (av.pfc.n < MIN_VERSION_SAMPLES) continue; av.pfc_scale= avg/av.pfc.get_avg(); DB_APP_VERSION dav; dav.id = av.id; sprintf(buf, "pfc_scale=%.15e", av.pfc_scale); retval = dav.update_field(buf); if (retval) return retval; if (config.debug_credit) { PLATFORM* p = ssp->lookup_platform_id(av.platformid); log_messages.printf(MSG_NORMAL, " updating scale factor for %d (%s %s)\n", av.id, p->name, av.plan_class ); log_messages.printf(MSG_NORMAL, " n: %g avg PFC: %g new scale: %g\n", av.pfc.n, av.pfc.get_avg(), av.pfc_scale ); } } app.min_avg_pfc = avg; DB_APP da; da.id = app.id; sprintf(buf, "min_avg_pfc=%.15e", avg); retval = da.update_field(buf); if (retval) return retval; return 0; }
DB_APP_VERSION* av_lookup(int id, vector<DB_APP_VERSION>& app_versions) { for (unsigned int i=0; i<app_versions.size(); i++) { if (app_versions[i].id == id) { return &app_versions[i]; } } DB_APP_VERSION av; int retval = av.lookup_id(id); if (retval) return NULL; app_versions.push_back(av); return &(app_versions[app_versions.size()-1]); }
// return total project FLOPS (based on recent credit) // int get_project_flops(double& total) { DB_APP_VERSION av; char buf[256]; // compute credit per day // sprintf(buf, "where expavg_time > %f", dtime() - 30*86400); total = 0; while (1) { int retval = av.enumerate(buf); if (retval == ERR_DB_NOT_FOUND) break; if (retval) { return retval; } total += av.expavg_credit; } total /= COBBLESTONE_SCALE; // convert to FLOPs per day total /= 86400; // convert to FLOPs per second return 0; }
// Update app version scale factors, // and find the min average PFC for each app. // Called periodically from the master feeder. // int update_av_scales(SCHED_SHMEM* ssp) { int i, j, retval; if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "-- updating app version scales --\n"); } for (i=0; i<ssp->napps; i++) { APP& app = ssp->apps[i]; if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "app %s (%d)\n", app.name, app.id); } RSC_INFO cpu_info, gpu_info; // find the average PFC of CPU and GPU versions for (j=0; j<ssp->napp_versions; j++) { APP_VERSION& avr = ssp->app_versions[j]; if (avr.appid != app.id) continue; DB_APP_VERSION av; retval = av.lookup_id(avr.id); if (retval) return retval; avr = av; // update shared mem array if (app_plan_uses_gpu(av.plan_class)) { if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "add to gpu totals: (%d %s) %g %g\n", av.id, av.plan_class, av.pfc.n, av.pfc.get_avg() ); } gpu_info.update(av); } else { if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "add to cpu totals: (%d %s) %g %g\n", av.id, av.plan_class, av.pfc.n, av.pfc.get_avg() ); } cpu_info.update(av); } } // If there are both CPU and GPU versions, // and at least 1 of each is above threshold, // normalize to the min of the averages // // Otherwise, if either CPU or GPU has at least // 2 versions above threshold, normalize to the average // if (cpu_info.nvers_thresh && gpu_info.nvers_thresh) { if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "CPU avg: %g; GPU avg: %g\n", cpu_info.avg(), gpu_info.avg() ); } scale_versions(app, cpu_info.avg()<gpu_info.avg()?cpu_info.avg():gpu_info.avg(), ssp ); } else if (cpu_info.nvers_thresh > 1) { log_messages.printf(MSG_NORMAL, "CPU avg: %g\n", cpu_info.avg() ); scale_versions(app, cpu_info.avg(), ssp); } else if (gpu_info.nvers_thresh > 0) { log_messages.printf(MSG_NORMAL, "GPU avg: %g\n", gpu_info.avg() ); scale_versions(app, gpu_info.avg()*DEFAULT_GPU_SCALE, ssp); } } if (config.debug_credit) { log_messages.printf(MSG_NORMAL, "-------------\n"); } return 0; }
// The WU is already committed to an app version. // - check if this host supports that platform // - if plan class, check if this host can handle it // - check if we need work for the resource // // If all these are satisfied, return a pointer to a BEST_APP_VERSION struct // with HOST_USAGE filled in correctly. // Else return NULL. // static BEST_APP_VERSION* check_homogeneous_app_version( WORKUNIT& wu, bool /* reliable_only */ // TODO: enforce reliable_only ) { BEST_APP_VERSION bav; bool found; APP_VERSION *avp = ssp->lookup_app_version(wu.app_version_id); if (!avp) { // If the app version is not in shmem, // it's been superceded or deprecated. // Use it anyway. // Keep an array of such app versions in // SCHEDULER_REPLY::old_app_versions // found = false; for (unsigned int i=0; i<g_reply->old_app_versions.size(); i++) { APP_VERSION& av = g_reply->old_app_versions[i]; if (av.id == wu.app_version_id) { avp = &av; found = true; break; } } if (!found) { DB_APP_VERSION av; int retval = av.lookup_id(wu.app_version_id); if (retval) return NULL; g_reply->old_app_versions.push_back(av); avp = &(g_reply->old_app_versions.back()); } } // see if this host supports the version's platform // found = false; for (unsigned int i=0; i<g_request->platforms.list.size(); i++) { PLATFORM* p = g_request->platforms.list[i]; if (p->id == avp->platformid) { found = true; bav.avp = avp; break; } } if (!found) return NULL; // and see if it supports the plan class // if (strlen(avp->plan_class)) { if (!app_plan(*g_request, avp->plan_class, bav.host_usage)) { return NULL; } } else { bav.host_usage.sequential_app(capped_host_fpops()); } // and see if the client is asking for this resource // if (!need_this_resource(bav.host_usage, avp, NULL)) { return NULL; } // dynamically allocate the BEST_APP_VERSION. // This is a memory leak, but that's OK // BEST_APP_VERSION* bavp = new BEST_APP_VERSION; *bavp = bav; return bavp; }