Example #1
0
// show a notice if we can't get work from this project,
// and there's something the user could do about it.
//
void PROJECT::show_no_work_notice() {
    bool some_banned = false;
    for (int i=0; i<coprocs.n_rsc; i++) {
        if (no_rsc_apps[i]) continue;
        bool banned_by_user = no_rsc_pref[i] || no_rsc_config[i];
        if (!gstate.acct_mgr_info.dynamic) {
            // dynamic account managers manage rsc usage themselves, not user
            //
            banned_by_user = banned_by_user || no_rsc_ams[i];
            // note to self: ||= doesn't exist
        }
        if (!banned_by_user) {
            continue;
        }
        string x;
        x = NO_WORK_MSG;
        x += " ";
        x += rsc_name_long(i);
        x += ".  ";
        x += _("To fix this, you can ");

        bool first = true;
        if (no_rsc_pref[i]) {
            x += _("change Project Preferences on the project's web site");
            first = false;
        }
        if (no_rsc_config[i]) {
            if (!first) x += ", or ";
            x += _("remove GPU exclusions in your cc_config.xml file");
            first = false;
        }
        if (no_rsc_ams[i] && !gstate.acct_mgr_info.dynamic) {
            if (!first) x += ", or ";
            x += _("change your settings at your account manager web site");
        }
        x += ".";
        msg_printf(this, MSG_USER_ALERT, "%s", x.c_str());
        some_banned = true;
    }
    if (!some_banned) {
        notices.remove_notices(this, REMOVE_NO_WORK_MSG);
        return;
    }

}
Example #2
0
int RESULT::write_gui(MIOFILE& out) {
    out.printf(
        "<result>\n"
        "    <name>%s</name>\n"
        "    <wu_name>%s</wu_name>\n"
        "    <version_num>%d</version_num>\n"
        "    <plan_class>%s</plan_class>\n"
        "    <project_url>%s</project_url>\n"
        "    <final_cpu_time>%f</final_cpu_time>\n"
        "    <final_elapsed_time>%f</final_elapsed_time>\n"
        "    <exit_status>%d</exit_status>\n"
        "    <state>%d</state>\n"
        "    <report_deadline>%f</report_deadline>\n"
        "    <received_time>%f</received_time>\n"
        "    <estimated_cpu_time_remaining>%f</estimated_cpu_time_remaining>\n",
        name,
        wu_name,
        version_num,
        plan_class,
        project->master_url,
        final_cpu_time,
        final_elapsed_time,
        exit_status,
        state(),
        report_deadline,
        received_time,
        estimated_runtime_remaining()
    );
    if (got_server_ack) out.printf("    <got_server_ack/>\n");
    if (ready_to_report) out.printf("    <ready_to_report/>\n");
    if (completed_time) out.printf("    <completed_time>%f</completed_time>\n", completed_time);
    if (suspended_via_gui) out.printf("    <suspended_via_gui/>\n");
    if (project->suspended_via_gui) out.printf("    <project_suspended_via_gui/>\n");
    if (report_immediately) out.printf("    <report_immediately/>\n");
    if (edf_scheduled) out.printf("    <edf_scheduled/>\n");
    if (coproc_missing) out.printf("    <coproc_missing/>\n");
    if (schedule_backoff > gstate.now) {
        out.printf("    <scheduler_wait/>\n");
        if (strlen(schedule_backoff_reason)) {
            out.printf(
                "    <scheduler_wait_reason>%s</scheduler_wait_reason>\n",
                schedule_backoff_reason
            );
        }
    }
    if (avp->needs_network && gstate.network_suspended) out.printf("    <network_wait/>\n");
    ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this);
    if (atp) {
        atp->write_gui(out);
    }
    if (!strlen(resources)) {
        // only need to compute this string once
        //
        if (avp->gpu_usage.rsc_type) {
            if (avp->gpu_usage.usage == 1) {
                sprintf(resources,
                    "%.3g CPUs + 1 %s",
                    avp->avg_ncpus,
                    rsc_name_long(avp->gpu_usage.rsc_type)
                );
            } else {
                sprintf(resources,
                    "%.3g CPUs + %.3g %ss",
                    avp->avg_ncpus,
                    avp->gpu_usage.usage,
                    rsc_name_long(avp->gpu_usage.rsc_type)
                );
            }
        } else if (avp->missing_coproc) {
            sprintf(resources, "%.3g CPUs + %s GPU (missing)",
                avp->avg_ncpus, avp->missing_coproc_name
            );
        } else if (!project->non_cpu_intensive && (avp->avg_ncpus != 1)) {
            sprintf(resources, "%.3g CPUs", avp->avg_ncpus);
        } else {
            strcpy(resources, " ");
        }
    }
    if (strlen(resources)>1) {
        char buf[256];
        strcpy(buf, "");
        if (atp && atp->scheduler_state == CPU_SCHED_SCHEDULED) {
            if (avp->gpu_usage.rsc_type) {
                COPROC& cp = coprocs.coprocs[avp->gpu_usage.rsc_type];
                if (cp.count > 1) {
                    // if there are multiple GPUs of this type,
                    // show the user which one(s) are being used
                    //
                    int n = (int)ceil(avp->gpu_usage.usage);
                    strcpy(buf, " (device ");
                    for (int i=0; i<n; i++) {
                        char buf2[256];
                        sprintf(buf2, "%d", cp.device_nums[coproc_indices[i]]);
                        if (i > 0) {
                            strcat(buf, "/");
                        }
                        strcat(buf, buf2);
                    }
                    strcat(buf, ")");
                }
            }
        }
        out.printf(
            "    <resources>%s%s</resources>\n", resources, buf
        );
    }
    out.printf("</result>\n");
    return 0;
}
Example #3
0
void simulate() {
    bool action;
    double start = START_TIME;
    gstate.now = start;
    html_start();
    fprintf(summary_file,
        "Hardware summary\n   %d CPUs, %.1f GFLOPS\n",
        gstate.host_info.p_ncpus, gstate.host_info.p_fpops/1e9
    );
    for (int i=1; i<coprocs.n_rsc; i++) {
        fprintf(summary_file,
            "   %d %s GPUs, %.1f GFLOPS\n",
            coprocs.coprocs[i].count,
            coprocs.coprocs[i].type,
            coprocs.coprocs[i].peak_flops/1e9
        );
    }
    fprintf(summary_file,
        "Preferences summary\n"
        "   work buf min %f max %f\n"
        "   Scheduling period %f\n"
        "Scheduling policies\n"
        "   Round-robin only: %s\n"
        "   Scheduler EDF simulation: %s\n"
        "   REC half-life: %f\n",
        gstate.work_buf_min(), gstate.work_buf_total(),
        gstate.global_prefs.cpu_scheduling_period(),
        cpu_sched_rr_only?"yes":"no",
        server_uses_workload?"yes":"no",
        cc_config.rec_half_life
    );
    fprintf(summary_file, "Jobs\n");
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        fprintf(summary_file,
            "   %s %s (%s)\n      time left %s deadline %s\n",
            rp->project->project_name,
            rp->name,
            rsc_name_long(rp->avp->gpu_usage.rsc_type),
            timediff_format(rp->sim_flops_left/rp->avp->flops).c_str(),
            timediff_format(rp->report_deadline - START_TIME).c_str()
        );
    }
    fprintf(summary_file,
        "Simulation parameters\n"
        "   time step %f, duration %f\n"
        "-------------------\n",
        delta, duration
    );

    write_inputs();

    while (1) {
        on = on_proc.sample(delta);
        if (on) {
            active = active_proc.sample(delta);
            if (active) {
                gpu_active = gpu_active_proc.sample(delta);
            } else {
                gpu_active = false;
            }
            connected = connected_proc.sample(delta);
        } else {
            active = gpu_active = connected = false;
        }
        // do accounting for the period that just ended,
        // even if we're now in an "off" state.
        //
        // need both of the following, else crash
        //
        action |= gstate.active_tasks.poll();
        action |= gstate.handle_finished_apps();
        if (on) {
            while (1) {
                action = false;
                action |= gstate.schedule_cpus();
                if (connected) {
                    action |= gstate.scheduler_rpc_poll();
                        // this deletes completed results
                }
                action |= gstate.active_tasks.poll();
                action |= gstate.handle_finished_apps();
                gpu_suspend_reason = gpu_active?0:1;
                //msg_printf(0, MSG_INFO, action?"did action":"did no action");
                if (!action) break;
            }
        }
        //msg_printf(0, MSG_INFO, "took time step");
        for (unsigned int i=0; i<gstate.active_tasks.active_tasks.size(); i++) {
            ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i];
            if (atp->task_state() == PROCESS_EXECUTING) {
                atp->elapsed_time += delta;
            }
        }
        html_rec();
        write_recs();
        gstate.now += delta;
        if (gstate.now > start + duration) break;
    }
    html_end();
}