// "avg" is the average PFC for this app
// over CPU versions or GPU versions, whichever is lowest.
// Update the pfc_scale of this app's versions in the DB,
// and update app.min_avg_pfc
//
int scale_versions(APP& app, double avg, SCHED_SHMEM* ssp) {
    char buf[256];
    int retval;

    for (int j=0; j<ssp->napp_versions; j++) {
        APP_VERSION& av = ssp->app_versions[j];
        if (av.appid != app.id) continue;
        if (av.pfc.n < MIN_VERSION_SAMPLES) continue;
        av.pfc_scale= avg/av.pfc.get_avg();

        DB_APP_VERSION dav;
        dav.id = av.id;
        sprintf(buf, "pfc_scale=%.15e", av.pfc_scale);
        retval = dav.update_field(buf);
        if (retval) return retval;
        if (config.debug_credit) {
            PLATFORM* p = ssp->lookup_platform_id(av.platformid);
            log_messages.printf(MSG_NORMAL,
                " updating scale factor for %d (%s %s)\n",
                av.id, p->name, av.plan_class
            );
            log_messages.printf(MSG_NORMAL,
                "  n: %g avg PFC: %g new scale: %g\n",
                av.pfc.n, av.pfc.get_avg(), av.pfc_scale
            );
        }
    }
    app.min_avg_pfc = avg;
    DB_APP da;
    da.id = app.id;
    sprintf(buf, "min_avg_pfc=%.15e", avg);
    retval = da.update_field(buf);
    if (retval) return retval;
    return 0;
}
DB_APP_VERSION* av_lookup(int id, vector<DB_APP_VERSION>& app_versions) {
    for (unsigned int i=0; i<app_versions.size(); i++) {
        if (app_versions[i].id == id) {
            return &app_versions[i];
        }
    }
    DB_APP_VERSION av;
    int retval = av.lookup_id(id);
    if (retval) return NULL;
    app_versions.push_back(av);
    return &(app_versions[app_versions.size()-1]);
}
Exemple #3
0
// return total project FLOPS (based on recent credit)
//
int get_project_flops(double& total) {
    DB_APP_VERSION av;
    char buf[256];

    // compute credit per day
    //
    sprintf(buf, "where expavg_time > %f", dtime() - 30*86400);
    total = 0;
    while (1) {
        int retval = av.enumerate(buf);
        if (retval == ERR_DB_NOT_FOUND) break;
        if (retval) {
            return retval;
        }
        total += av.expavg_credit;
    }
    total /= COBBLESTONE_SCALE;     // convert to FLOPs per day
    total /= 86400;                 // convert to FLOPs per second
    return 0;
}
// Update app version scale factors,
// and find the min average PFC for each app.
// Called periodically from the master feeder.
//
int update_av_scales(SCHED_SHMEM* ssp) {
    int i, j, retval;
    if (config.debug_credit) {
        log_messages.printf(MSG_NORMAL, "-- updating app version scales --\n");
    }
    for (i=0; i<ssp->napps; i++) {
        APP& app = ssp->apps[i];
        if (config.debug_credit) {
            log_messages.printf(MSG_NORMAL, "app %s (%d)\n", app.name, app.id);
        }
        RSC_INFO cpu_info, gpu_info;

        // find the average PFC of CPU and GPU versions

        for (j=0; j<ssp->napp_versions; j++) {
            APP_VERSION& avr = ssp->app_versions[j];
            if (avr.appid != app.id) continue;
            DB_APP_VERSION av;
            retval = av.lookup_id(avr.id);
            if (retval) return retval;
            avr = av;       // update shared mem array
            if (app_plan_uses_gpu(av.plan_class)) {
                if (config.debug_credit) {
                    log_messages.printf(MSG_NORMAL,
                        "add to gpu totals: (%d %s) %g %g\n",
                        av.id, av.plan_class, av.pfc.n, av.pfc.get_avg()
                    );
                }
                gpu_info.update(av);
            } else {
                if (config.debug_credit) {
                    log_messages.printf(MSG_NORMAL,
                        "add to cpu totals: (%d %s) %g %g\n",
                        av.id, av.plan_class, av.pfc.n, av.pfc.get_avg()
                    );
                }
                cpu_info.update(av);
            }
        }

        // If there are both CPU and GPU versions,
        // and at least 1 of each is above threshold,
        // normalize to the min of the averages
        // 
        // Otherwise, if either CPU or GPU has at least
        // 2 versions above threshold, normalize to the average
        //
        if (cpu_info.nvers_thresh && gpu_info.nvers_thresh) {
            if (config.debug_credit) {
                log_messages.printf(MSG_NORMAL,
                    "CPU avg: %g; GPU avg: %g\n",
                    cpu_info.avg(), gpu_info.avg()
                );
            }
            scale_versions(app,
                cpu_info.avg()<gpu_info.avg()?cpu_info.avg():gpu_info.avg(),
                ssp
            );
        } else if (cpu_info.nvers_thresh > 1) {
            log_messages.printf(MSG_NORMAL,
                "CPU avg: %g\n", cpu_info.avg()
            );
            scale_versions(app, cpu_info.avg(), ssp);
        } else if (gpu_info.nvers_thresh > 0) {
            log_messages.printf(MSG_NORMAL,
                "GPU avg: %g\n", gpu_info.avg()
            );
            scale_versions(app, gpu_info.avg()*DEFAULT_GPU_SCALE, ssp);
        }

    }
    if (config.debug_credit) {
        log_messages.printf(MSG_NORMAL, "-------------\n");
    }
    return 0;
}
Exemple #5
0
// The WU is already committed to an app version.
// - check if this host supports that platform
// - if plan class, check if this host can handle it
// - check if we need work for the resource
//
// If all these are satisfied, return a pointer to a BEST_APP_VERSION struct
// with HOST_USAGE filled in correctly.
// Else return NULL.
//
static BEST_APP_VERSION* check_homogeneous_app_version(
    WORKUNIT& wu, bool /* reliable_only */
    // TODO: enforce reliable_only
) {
    BEST_APP_VERSION bav;

    bool found;
    APP_VERSION *avp = ssp->lookup_app_version(wu.app_version_id);
    if (!avp) {
        // If the app version is not in shmem,
        // it's been superceded or deprecated.
        // Use it anyway.
        // Keep an array of such app versions in
        // SCHEDULER_REPLY::old_app_versions
        //
        found = false;
        for (unsigned int i=0; i<g_reply->old_app_versions.size(); i++) {
            APP_VERSION& av = g_reply->old_app_versions[i];
            if (av.id == wu.app_version_id) {
                avp = &av;
                found = true;
                break;
            }
        }
        if (!found) {
            DB_APP_VERSION av;
            int retval = av.lookup_id(wu.app_version_id);
            if (retval) return NULL;
            g_reply->old_app_versions.push_back(av);
            avp = &(g_reply->old_app_versions.back());
        }
    }

    // see if this host supports the version's platform
    //
    found = false;
    for (unsigned int i=0; i<g_request->platforms.list.size(); i++) {
        PLATFORM* p = g_request->platforms.list[i];
        if (p->id == avp->platformid) {
            found = true;
            bav.avp = avp;
            break;
        }
    }
    if (!found) return NULL;

    // and see if it supports the plan class
    //
    if (strlen(avp->plan_class)) {
        if (!app_plan(*g_request, avp->plan_class, bav.host_usage)) {
            return NULL;
        }
    } else {
        bav.host_usage.sequential_app(capped_host_fpops());
    }

    // and see if the client is asking for this resource
    //
    if (!need_this_resource(bav.host_usage, avp, NULL)) {
        return NULL;
    }

    // dynamically allocate the BEST_APP_VERSION.
    // This is a memory leak, but that's OK
    //
    BEST_APP_VERSION* bavp = new BEST_APP_VERSION;
    *bavp = bav;
    return bavp;
}