Example #1
0
// Decide what jobs to include in the simulation;
// build the "pending" lists for each (project, processor type) pair.
// NOTE: "results" is sorted by increasing arrival time.
//
void RR_SIM::init_pending_lists() {
    for (unsigned int i=0; i<gstate.projects.size(); i++) {
        PROJECT* p = gstate.projects[i];
        for (int j=0; j<coprocs.n_rsc; j++) {
            p->rsc_pwf[j].pending.clear();
            p->rsc_pwf[j].queue_est = 0;
        }
    }
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        rp->rr_sim_misses_deadline = false;
        rp->already_selected = false;
        if (!rp->nearly_runnable()) continue;
        if (rp->some_download_stalled()) continue;
        if (rp->project->non_cpu_intensive) continue;
        rp->rrsim_flops_left = rp->estimated_flops_remaining();

        //if (rp->rrsim_flops_left <= 0) continue;
            // job may have fraction_done=1 but not be done;
            // if it's past its deadline, we need to mark it as such

        PROJECT* p = rp->project;
        p->pwf.n_runnable_jobs++;
        p->rsc_pwf[0].nused_total += rp->avp->avg_ncpus;
        set_rrsim_flops(rp);
        int rt = rp->avp->gpu_usage.rsc_type;
        if (rt) {
            p->rsc_pwf[rt].nused_total += rp->avp->gpu_usage.usage;
            p->rsc_pwf[rt].n_runnable_jobs++;
            p->rsc_pwf[rt].queue_est += rp->rrsim_flops_left/rp->rrsim_flops;
        }
        p->rsc_pwf[rt].pending.push_back(rp);
        rp->rrsim_done = false;
    }
}
Example #2
0
void CLIENT_STATE::compute_nuploading_results() {
    unsigned int i;

    for (i=0; i<projects.size(); i++) {
        projects[i]->nuploading_results = 0;
        projects[i]->too_many_uploading_results = false;
    }
    for (i=0; i<results.size(); i++) {
        RESULT* rp = results[i];
        if (rp->state() == RESULT_FILES_UPLOADING) {
            rp->project->nuploading_results++;
        }
    }
    int n = gstate.ncpus;
    for (int j=1; j<coprocs.n_rsc; j++) {
        if (coprocs.coprocs[j].count > n) {
            n = coprocs.coprocs[j].count;
        }
    }
    n *= 2;
    for (i=0; i<projects.size(); i++) {
        if (projects[i]->nuploading_results > n) {
            projects[i]->too_many_uploading_results = true;
        }
    }
}
void CLIENT_STATE::compute_nuploading_results() {
    unsigned int i;

    for (i=0; i<projects.size(); i++) {
        projects[i]->nuploading_results = 0;
        projects[i]->too_many_uploading_results = false;
    }
    for (i=0; i<results.size(); i++) {
        RESULT* rp = results[i];
        if (rp->state() == RESULT_FILES_UPLOADING) {
            rp->project->nuploading_results++;
        }
    }
    int n = gstate.ncpus;
    if (gstate.host_info.have_cuda() && gstate.host_info.coprocs.cuda.count > n) {
        n = gstate.host_info.coprocs.cuda.count;
    }
    if (gstate.host_info.have_ati() && gstate.host_info.coprocs.ati.count > n) {
        n = gstate.host_info.coprocs.ati.count;
    }
    n *= 2;
    for (i=0; i<projects.size(); i++) {
        if (projects[i]->nuploading_results > n) {
            projects[i]->too_many_uploading_results = true;
        }
    }
}
Example #4
0
void PROJECT::abort_not_started() {
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != this) continue;
        if (rp->is_not_started()) {
            rp->abort_inactive(EXIT_ABORTED_VIA_GUI);
        }
    }
}
Example #5
0
bool PROJECT::downloading() {
    if (suspended_via_gui) return false;
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != this) continue;
        if (rp->downloading()) return true;
    }
    return false;
}
Example #6
0
// a file upload has finished.
// If any running apps are waiting for it, notify them
//
void ACTIVE_TASK_SET::upload_notify_app(FILE_INFO* fip) {
    for (unsigned int i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        RESULT* rp = atp->result;
        FILE_REF* frp = rp->lookup_file(fip);
        if (frp) {
            atp->upload_notify_app(fip, frp);
        }
    }
}
Example #7
0
// does the project have a downloading or runnable job?
//
static bool has_a_job(PROJECT* p) {
    for (unsigned int j=0; j<gstate.results.size(); j++) {
        RESULT* rp = gstate.results[j];
        if (rp->project != p) continue;
        if (rp->state() <= RESULT_FILES_DOWNLOADED) {
            return true;
        }
    }
    return false;
}
Example #8
0
// convert results in progress to IP_RESULTs,
// and get an initial schedule for them
//
void CLIENT_STATE::get_workload(vector<IP_RESULT>& ip_results) {
    for (unsigned int i=0; i<results.size(); i++) {
        RESULT* rp = results[i];
        double x = rp->estimated_runtime_remaining();
        if (x == 0) continue;
        IP_RESULT ipr(rp->name, rp->report_deadline-now, x);
        ip_results.push_back(ipr);
    }
    //init_ip_results(work_buf_min(), ncpus, ip_results);
    init_ip_results(0, ncpus, ip_results);
}
Example #9
0
// choose a project to fetch work from,
// and set the request fields of resource objects.
//
PROJECT* WORK_FETCH::choose_project(
    bool strict_hyst,
    PROJECT* backoff_exempt_project
) {
    PROJECT* p;

    if (log_flags.work_fetch_debug) {
        msg_printf(0, MSG_INFO, "[work_fetch] work fetch start");
    }

    p = non_cpu_intensive_project_needing_work();
    if (p) return p;

    gstate.compute_nuploading_results();

    rr_simulation();
    compute_shares();
    project_priority_init(true);

    // Decrement the priority of projects that have a lot of work queued.
    // Specifically, subtract
    // (FLOPs queued for P)/(FLOPs of max queue)
    // which will generally be between 0 and 1.
    // This is a little arbitrary but I can't think of anything better.
    //
    double max_queued_flops = gstate.work_buf_total()*total_peak_flops();
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        p = rp->project;
        p->sched_priority -= rp->estimated_flops_remaining()/max_queued_flops;
    }

    p = 0;
    if (gpus_usable) {
        for (int i=1; i<coprocs.n_rsc; i++) {
            p = rsc_work_fetch[i].choose_project_hyst(strict_hyst, backoff_exempt_project);
            if (p) break;
        }
    }
    if (!p) {
        p = rsc_work_fetch[0].choose_project_hyst(strict_hyst, backoff_exempt_project);
    }

    if (log_flags.work_fetch_debug) {
        print_state();
        if (!p) {
            msg_printf(0, MSG_INFO, "[work_fetch] No project chosen for work fetch");
        }
    }

    return p;
}
Example #10
0
void job_count(PROJECT* p, int rsc_type, int& in_progress, int& done) {
    in_progress = done = 0;
    unsigned int i;
    for (i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != p) continue;
        if (rp->resource_type() != rsc_type) continue;
        if (rp->state() < RESULT_FILES_UPLOADED) {
            in_progress++;
        } else {
            done++;
        }
    }
}
Example #11
0
bool PROJECT::runnable(int rsc_type) {
    if (suspended_via_gui) return false;
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != this) continue;
        if (rsc_type != RSC_TYPE_ANY) {
            if (rp->avp->gpu_usage.rsc_type != rsc_type) {
                continue;
            }
        }
        if (rp->runnable()) return true;
    }
    return false;
}
Example #12
0
void PROJECT::get_task_durs(double& not_started_dur, double& in_progress_dur) {
    not_started_dur = 0;
    in_progress_dur = 0;
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != this) continue;
        double d = rp->estimated_runtime_remaining();
        d /= gstate.time_stats.availability_frac(rp->avp->gpu_usage.rsc_type);
        if (rp->is_not_started()) {
            not_started_dur += d;
        } else {
            in_progress_dur += d;
        }
    }
}
Example #13
0
int CLIENT_STATE::write_tasks_gui(MIOFILE& f, bool active_only) {
    unsigned int i;

    if (active_only) {
        for (i=0; i<active_tasks.active_tasks.size(); i++) {
            RESULT* rp = active_tasks.active_tasks[i]->result;
            rp->write_gui(f);
        }
    } else {
        for (i=0; i<results.size(); i++) {
            RESULT* rp = results[i];
            rp->write_gui(f);
        }
    }
    return 0;
}
static void handle_result_op(GUI_RPC_CONN& grc, const char* op) {
    RESULT* rp;
    char result_name[256];
    ACTIVE_TASK* atp;
    string project_url;

    strcpy(result_name, "");
    while (!grc.xp.get_tag()) {
        if (grc.xp.parse_str("name", result_name, sizeof(result_name))) continue;
        if (grc.xp.parse_string("project_url", project_url)) continue;
    }
    PROJECT* p = get_project(grc, project_url);
    if (!p) return;

    if (!strlen(result_name)) {
        grc.mfout.printf("<error>Missing result name</error>\n");
        return;
    }

    rp = gstate.lookup_result(p, result_name);
    if (!rp) {
        grc.mfout.printf("<error>no such result</error>\n");
        return;
    }

    if (!strcmp(op, "abort")) {
        msg_printf(p, MSG_INFO, "task %s aborted by user", result_name);
        atp = gstate.lookup_active_task_by_result(rp);
        if (atp) {
            atp->abort_task(EXIT_ABORTED_VIA_GUI, "aborted by user");
        } else {
            rp->abort_inactive(EXIT_ABORTED_VIA_GUI);
        }
        gstate.request_work_fetch("result aborted by user");
    } else if (!strcmp(op, "suspend")) {
        msg_printf(p, MSG_INFO, "task %s suspended by user", result_name);
        rp->suspended_via_gui = true;
        gstate.request_work_fetch("result suspended by user");
    } else if (!strcmp(op, "resume")) {
        msg_printf(p, MSG_INFO, "task %s resumed by user", result_name);
        rp->suspended_via_gui = false;
    }
    gstate.request_schedule_cpus("result suspended, resumed or aborted by user");
    gstate.set_client_state_dirty("Result RPC");
    grc.mfout.printf("<success/>\n");
}
static void handle_result_op(char* buf, MIOFILE& fout, const char* op) {
    RESULT* rp;
    char result_name[256];
    ACTIVE_TASK* atp;

    PROJECT* p = get_project(buf, fout);
    if (!p) {
        fout.printf("<error>No such project</error>\n");
        return;
    }

    if (!parse_str(buf, "<name>", result_name, sizeof(result_name))) {
        fout.printf("<error>Missing result name</error>\n");
        return;
    }

    rp = gstate.lookup_result(p, result_name);
    if (!rp) {
        fout.printf("<error>no such result</error>\n");
        return;
    }

    if (!strcmp(op, "abort")) {
        msg_printf(p, MSG_INFO, "task %s aborted by user", result_name);
        atp = gstate.lookup_active_task_by_result(rp);
        if (atp) {
            atp->abort_task(ERR_ABORTED_VIA_GUI, "aborted by user");
        } else {
            rp->abort_inactive(ERR_ABORTED_VIA_GUI);
        }
        gstate.request_work_fetch("result aborted by user");
    } else if (!strcmp(op, "suspend")) {
        msg_printf(p, MSG_INFO, "task %s suspended by user", result_name);
        rp->suspended_via_gui = true;
        gstate.request_work_fetch("result suspended by user");
    } else if (!strcmp(op, "resume")) {
        msg_printf(p, MSG_INFO, "task %s resumed by user", result_name);
        rp->suspended_via_gui = false;
    }
    gstate.request_schedule_cpus("result suspended, resumed or aborted by user");
    gstate.set_client_state_dirty("Result RPC");
    fout.printf("<success/>\n");
}
bool PROJECT::runnable(int rsc_type) {
    if (suspended_via_gui) return false;
    if (suspended_during_update) return false;
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->project != this) continue;
        switch (rsc_type) {
        case RSC_TYPE_ANY:
            break;
        case RSC_TYPE_CPU:
            if (rp->uses_coprocs()) continue;
            break;
        case RSC_TYPE_CUDA:
            if (rp->avp->ncudas == 0) continue;
            break;
        case RSC_TYPE_ATI:
            if (rp->avp->natis == 0) continue;
            break;
        }
        if (rp->runnable()) return true;
    }
    return false;
}
// Compute an "overall long-term debt" for each project.
// This is a sum of per-resource terms, scaled by the relative speed of the resource.
// The term for a resource is its LTD plus an estimate of queued work.
//
void WORK_FETCH::set_overall_debts() {
    unsigned int i;
    PROJECT* p;
    RESULT* rp;
    APP_VERSION* avp;

    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        p->cpu_pwf.queue_est = 0;
        p->cuda_pwf.queue_est = 0;
        p->ati_pwf.queue_est = 0;
    }
    for (i=0; i<gstate.results.size(); i++) {
        rp = gstate.results[i];
        p = rp->project;
        if (!rp->nearly_runnable()) continue;
        if (p->non_cpu_intensive) continue;
        double dt = rp->estimated_time_remaining();
        avp = rp->avp;
        p->cpu_pwf.queue_est += dt*avp->avg_ncpus;
        p->cuda_pwf.queue_est += dt*avp->ncudas;
        p->ati_pwf.queue_est += dt*avp->natis;
    }
    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        double queue_debt = p->cpu_pwf.queue_est/gstate.ncpus;
        p->pwf.overall_debt = p->cpu_pwf.long_term_debt - queue_debt;
        if (gstate.host_info.have_cuda()) {
            p->pwf.overall_debt += cuda_work_fetch.relative_speed*
                (p->cuda_pwf.long_term_debt - p->cuda_pwf.queue_est/gstate.host_info.coprocs.cuda.count);
        }
        if (gstate.host_info.have_ati()) {
            p->pwf.overall_debt += ati_work_fetch.relative_speed*
                (p->ati_pwf.long_term_debt - p->ati_pwf.queue_est/gstate.host_info.coprocs.ati.count);
        }
    }
}
Example #18
0
bool ACTIVE_TASK_SET::poll() {
    unsigned int i;
    char buf[256];
    bool action = false;
    static double last_time = START_TIME;
    double diff = gstate.now - last_time;
    if (diff < 1.0) return false;
    last_time = gstate.now;
    if (diff > delta) {
        diff = 0;
    }
    PROJECT* p;

    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        p->idle = true;
    }

    // we do two kinds of FLOPs accounting:
    // 1) actual FLOPS (for job completion)
    // 2) peak FLOPS (for total and per-project resource usage)
    //
    // CPU may be overcommitted, in which case we compute
    //  a "cpu_scale" factor that is < 1.
    // GPUs are never overcommitted.
    //
    // actual FLOPS is based on app_version.flops, scaled by cpu_scale for CPU jobs
    // peak FLOPS is based on device peak FLOPS,
    //  with CPU component scaled by cpu_scale for all jobs

    // get CPU usage by GPU and CPU jobs
    //
    double cpu_usage_cpu=0;
    double cpu_usage_gpu=0;
    for (i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        RESULT* rp = atp->result;
        if (rp->uses_gpu()) {
            if (gpu_active) {
                cpu_usage_gpu += rp->avp->avg_ncpus;
            }
        } else {
            cpu_usage_cpu += rp->avp->avg_ncpus;
        }
    }
    double cpu_usage = cpu_usage_cpu + cpu_usage_gpu;

    // if CPU is overcommitted, compute cpu_scale
    //
    double cpu_scale = 1;
    if (cpu_usage > gstate.ncpus) {
        cpu_scale = (gstate.ncpus - cpu_usage_gpu) / (cpu_usage - cpu_usage_gpu);
    }

    double used = 0;
    for (i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        RESULT* rp = atp->result;
        if (!gpu_active && rp->uses_gpu()) {
            continue;
        }
        atp->elapsed_time += diff;
        double flops = rp->avp->flops;
        if (!rp->uses_gpu()) {
            flops *= cpu_scale;
        }

        rp->sim_flops_left -= diff*flops;

        atp->fraction_done = 1 - rp->sim_flops_left / rp->wup->rsc_fpops_est;
        atp->checkpoint_wall_time = gstate.now;

        if (rp->sim_flops_left <= 0) {
            atp->set_task_state(PROCESS_EXITED, "poll");
            rp->exit_status = 0;
            rp->ready_to_report = true;
            gstate.request_schedule_cpus("job finished");
            gstate.request_work_fetch("job finished");
            sprintf(buf, "result %s finished<br>", rp->name);
            html_msg += buf;
            action = true;
        }
        double pf = diff * app_peak_flops(rp->avp, cpu_scale);
        rp->project->project_results.flops_used += pf;
        rp->peak_flop_count += pf;
        sim_results.flops_used += pf;
        used += pf;
        rp->project->idle = false;
    }

    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        if (p->idle) {
            p->idle_time += diff;
            p->idle_time_sumsq += diff*(p->idle_time*p->idle_time);
        } else {
            p->idle_time = 0;
        }
    }
    active_time += diff;
    if (gpu_active) {
        gpu_active_time += diff;
    }

    return action;
}
Example #19
0
// Write a scheduler request to a disk file,
// to be sent to a scheduling server
//
int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
    char buf[1024];
    MIOFILE mf;
    unsigned int i;
    RESULT* rp;

    get_sched_request_filename(*p, buf, sizeof(buf));
    FILE* f = boinc_fopen(buf, "wb");
    if (!f) return ERR_FOPEN;

    double trs = total_resource_share();
    double rrs = runnable_resource_share(RSC_TYPE_ANY);
    double prrs = potentially_runnable_resource_share();
    double resource_share_fraction, rrs_fraction, prrs_fraction;
    if (trs) {
        resource_share_fraction = p->resource_share / trs;
    } else {
        resource_share_fraction = 1;
    }
    if (rrs) {
        rrs_fraction = p->resource_share / rrs;
    } else {
        rrs_fraction = 1;
    }
    if (prrs) {
        prrs_fraction = p->resource_share / prrs;
    } else {
        prrs_fraction = 1;
    }

    // if hostid is zero, rpc_seqno better be also
    //
    if (!p->hostid) {
        p->rpc_seqno = 0;
    }

    mf.init_file(f);
    fprintf(f,
        "<scheduler_request>\n"
        "    <authenticator>%s</authenticator>\n"
        "    <hostid>%d</hostid>\n"
        "    <rpc_seqno>%d</rpc_seqno>\n"
        "    <core_client_major_version>%d</core_client_major_version>\n"
        "    <core_client_minor_version>%d</core_client_minor_version>\n"
        "    <core_client_release>%d</core_client_release>\n"
        "    <resource_share_fraction>%f</resource_share_fraction>\n"
        "    <rrs_fraction>%f</rrs_fraction>\n"
        "    <prrs_fraction>%f</prrs_fraction>\n"
        "    <duration_correction_factor>%f</duration_correction_factor>\n"
        "    <allow_multiple_clients>%d</allow_multiple_clients>\n"
        "    <sandbox>%d</sandbox>\n",
        p->authenticator,
        p->hostid,
        p->rpc_seqno,
        core_client_version.major,
        core_client_version.minor,
        core_client_version.release,
        resource_share_fraction,
        rrs_fraction,
        prrs_fraction,
        p->duration_correction_factor,
        config.allow_multiple_clients?1:0,
        g_use_sandbox?1:0
    );
    work_fetch.write_request(f, p);

    // write client capabilities
    //
    fprintf(f,
        "    <client_cap_plan_class>1</client_cap_plan_class>\n"
    );

    write_platforms(p, mf);

    if (strlen(p->code_sign_key)) {
        fprintf(f, "    <code_sign_key>\n%s\n</code_sign_key>\n", p->code_sign_key);
    }

    // send working prefs
    //
    fprintf(f, "<working_global_preferences>\n");
    global_prefs.write(mf);
    fprintf(f, "</working_global_preferences>\n");

    // send master global preferences if present and not host-specific
    //
    if (!global_prefs.host_specific && boinc_file_exists(GLOBAL_PREFS_FILE_NAME)) {
        FILE* fprefs = fopen(GLOBAL_PREFS_FILE_NAME, "r");
        if (fprefs) {
            copy_stream(fprefs, f);
            fclose(fprefs);
        }
        PROJECT* pp = lookup_project(global_prefs.source_project);
        if (pp && strlen(pp->email_hash)) {
            fprintf(f,
                "<global_prefs_source_email_hash>%s</global_prefs_source_email_hash>\n",
                pp->email_hash
            );
        }
    }

    // Of the projects with same email hash as this one,
    // send the oldest cross-project ID.
    // Use project URL as tie-breaker.
    //
    PROJECT* winner = p;
    for (i=0; i<projects.size(); i++ ) {
        PROJECT* project = projects[i];
        if (project == p) continue;
        if (strcmp(project->email_hash, p->email_hash)) continue;
        if (project->cpid_time < winner->cpid_time) {
            winner = project;
        } else if (project->cpid_time == winner->cpid_time) {
            if (strcmp(project->master_url, winner->master_url) < 0) {
                winner = project;
            }
        }
    }
    fprintf(f,
        "<cross_project_id>%s</cross_project_id>\n",
        winner->cross_project_id
    );

    time_stats.write(mf, true);
    net_stats.write(mf);
    if (global_prefs.daily_xfer_period_days) {
        daily_xfer_history.write_scheduler_request(
            mf, global_prefs.daily_xfer_period_days
        );
    }

    // update hardware info, and write host info
    //
    host_info.get_host_info();
    set_ncpus();
    host_info.write(mf, !config.suppress_net_info, false);

    // get and write disk usage
    //
    get_disk_usages();
    get_disk_shares();
    fprintf(f,
        "    <disk_usage>\n"
        "        <d_boinc_used_total>%f</d_boinc_used_total>\n"
        "        <d_boinc_used_project>%f</d_boinc_used_project>\n"
        "        <d_project_share>%f</d_project_share>\n"
        "    </disk_usage>\n",
        total_disk_usage, p->disk_usage, p->disk_share
    );

    // copy request values from RSC_WORK_FETCH to COPROC
    //
    int j = rsc_index(GPU_TYPE_NVIDIA);
    if (j > 0) {
        coprocs.nvidia.req_secs = rsc_work_fetch[j].req_secs;
        coprocs.nvidia.req_instances = rsc_work_fetch[j].req_instances;
        coprocs.nvidia.estimated_delay = rsc_work_fetch[j].req_secs?rsc_work_fetch[j].busy_time_estimator.get_busy_time():0;
    }
    j = rsc_index(GPU_TYPE_ATI);
    if (j > 0) {
        coprocs.ati.req_secs = rsc_work_fetch[j].req_secs;
        coprocs.ati.req_instances = rsc_work_fetch[j].req_instances;
        coprocs.ati.estimated_delay = rsc_work_fetch[j].req_secs?rsc_work_fetch[j].busy_time_estimator.get_busy_time():0;
    }

    if (coprocs.n_rsc > 1) {
        coprocs.write_xml(mf, true);
    }

    // report completed jobs
    //
    unsigned int last_reported_index = 0;
    p->nresults_returned = 0;
    for (i=0; i<results.size(); i++) {
        rp = results[i];
        if (rp->project == p && rp->ready_to_report) {
            p->nresults_returned++;
            rp->write(mf, true);
        }
        if (config.max_tasks_reported
            && (p->nresults_returned >= config.max_tasks_reported)
        ) {
            last_reported_index = i;
            break;
        }
    }

    read_trickle_files(p, f);

    // report sticky files as needed
    //
    for (i=0; i<file_infos.size(); i++) {
        FILE_INFO* fip = file_infos[i];
        if (fip->project != p) continue;
        if (!fip->sticky) continue;
        fprintf(f,
            "    <file_info>\n"
            "        <name>%s</name>\n"
            "        <nbytes>%f</nbytes>\n"
            "        <status>%d</status>\n"
            "    </file_info>\n",
            fip->name, fip->nbytes, fip->status
        );
    }

    if (p->send_time_stats_log) {
        fprintf(f, "<time_stats_log>\n");
        time_stats.get_log_after(p->send_time_stats_log, mf);
        fprintf(f, "</time_stats_log>\n");
    }
    if (p->send_job_log) {
        fprintf(f, "<job_log>\n");
        job_log_filename(*p, buf, sizeof(buf));
        send_log_after(buf, p->send_job_log, mf);
        fprintf(f, "</job_log>\n");
    }

    // send descriptions of app versions
    //
    fprintf(f, "<app_versions>\n");
    j=0;
    for (i=0; i<app_versions.size(); i++) {
        APP_VERSION* avp = app_versions[i];
        if (avp->project != p) continue;
        avp->write(mf, false);
        avp->index = j++;
    }
    fprintf(f, "</app_versions>\n");

    // send descriptions of jobs in progress for this project
    //
    fprintf(f, "<other_results>\n");
    for (i=0; i<results.size(); i++) {
        rp = results[i];
        if (rp->project != p) continue;
        if ((last_reported_index && (i > last_reported_index)) || !rp->ready_to_report) {
            fprintf(f,
                "    <other_result>\n"
                "        <name>%s</name>\n"
                "        <app_version>%d</app_version>\n",
                rp->name,
                rp->avp->index
            );
            // the following is for backwards compatibility w/ old schedulers
            //
            if (strlen(rp->avp->plan_class)) {
                fprintf(f,
                    "        <plan_class>%s</plan_class>\n",
                    rp->avp->plan_class
                );
            }
            fprintf(f,
                "    </other_result>\n"
            );
        }
    }
    fprintf(f, "</other_results>\n");

    // if requested by project, send summary of all in-progress results
    // (for EDF simulation by scheduler)
    //
    if (p->send_full_workload) {
        fprintf(f, "<in_progress_results>\n");
        for (i=0; i<results.size(); i++) {
            rp = results[i];
            double x = rp->estimated_runtime_remaining();
            if (x == 0) continue;
            strcpy(buf, "");
            int rt = rp->avp->gpu_usage.rsc_type;
            if (rt) {
                if (rt == rsc_index(GPU_TYPE_NVIDIA)) {
                    sprintf(buf, "        <ncudas>%f</ncudas>\n", rp->avp->gpu_usage.usage);
                } else if (rt == rsc_index(GPU_TYPE_ATI)) {
                    sprintf(buf, "        <natis>%f</natis>\n", rp->avp->gpu_usage.usage);
                }
            }
            fprintf(f,
                "    <ip_result>\n"
                "        <name>%s</name>\n"
                "        <report_deadline>%.0f</report_deadline>\n"
                "        <time_remaining>%.2f</time_remaining>\n"
                "        <avg_ncpus>%f</avg_ncpus>\n"
                "%s"
                "    </ip_result>\n",
                rp->name,
                rp->report_deadline,
                x,
                rp->avp->avg_ncpus,
                buf
            );
        }
        fprintf(f, "</in_progress_results>\n");
    }
    FILE* cof = boinc_fopen(CLIENT_OPAQUE_FILENAME, "r");
    if (cof) {
        fprintf(f, "<client_opaque>\n<![CDATA[\n");
        copy_stream(cof, f);
        fprintf(f, "\n]]>\n</client_opaque>\n");
        fclose(cof);
    }

    fprintf(f, "</scheduler_request>\n");

    fclose(f);
    return 0;
}
Example #20
0
// Handle the reply from a scheduler
//
int CLIENT_STATE::handle_scheduler_reply(
    PROJECT* project, char* scheduler_url
) {
    SCHEDULER_REPLY sr;
    FILE* f;
    int retval;
    unsigned int i;
    bool signature_valid, update_global_prefs=false, update_project_prefs=false;
    char buf[1024], filename[256];
    std::string old_gui_urls = project->gui_urls;
    PROJECT* p2;
    vector<RESULT*>new_results;

    project->last_rpc_time = now;

    if (requested_work()) {
        had_or_requested_work = true;
    }

    get_sched_reply_filename(*project, filename, sizeof(filename));

    f = fopen(filename, "r");
    if (!f) return ERR_FOPEN;
    retval = sr.parse(f, project);
    fclose(f);
    if (retval) return retval;

    if (log_flags.sched_ops) {
        if (requested_work()) {
            sprintf(buf, ": got %d new tasks", (int)sr.results.size());
        } else {
            strcpy(buf, "");
        }
        msg_printf(project, MSG_INFO, "Scheduler request completed%s", buf);
    }
    if (log_flags.sched_op_debug) {
        if (sr.scheduler_version) {
            msg_printf(project, MSG_INFO,
                "[sched_op] Server version %d",
                sr.scheduler_version
            );
        }
    }

    // check that master URL is correct
    //
    if (strlen(sr.master_url)) {
        canonicalize_master_url(sr.master_url);
        string url1 = sr.master_url;
        string url2 = project->master_url;
        downcase_string(url1);
        downcase_string(url2);
        if (url1 != url2) {
            p2 = lookup_project(sr.master_url);
            if (p2) {
                msg_printf(project, MSG_USER_ALERT,
                    "You are attached to this project twice.  Please remove projects named %s, then add %s",
                    project->project_name,
                    sr.master_url
                );
            } else {
                msg_printf(project, MSG_INFO,
                    _("You used the wrong URL for this project.  When convenient, remove this project, then add %s"),
                    sr.master_url
                );
            }
        }
    }

    // make sure we don't already have a project of same name
    //
    bool dup_name = false;
    for (i=0; i<projects.size(); i++) {
        p2 = projects[i];
        if (project == p2) continue;
        if (!strcmp(p2->project_name, project->project_name)) {
            dup_name = true;
            break;
        }
    }
    if (dup_name) {
        msg_printf(project, MSG_INFO,
            "Already attached to a project named %s (possibly with wrong URL)",
            project->project_name
        );
        msg_printf(project, MSG_INFO,
            "Consider detaching this project, then trying again"
        );
    }

    // show messages from server
    //
    for (i=0; i<sr.messages.size(); i++) {
        USER_MESSAGE& um = sr.messages[i];
        int prio = (!strcmp(um.priority.c_str(), "notice"))?MSG_SCHEDULER_ALERT:MSG_INFO;
        string_substitute(um.message.c_str(), buf, sizeof(buf), "%", "%%");
        msg_printf(project, prio, "%s", buf);
    }

    if (log_flags.sched_op_debug && sr.request_delay) {
        msg_printf(project, MSG_INFO,
            "Project requested delay of %.0f seconds", sr.request_delay
        );
    }

    // if project is down, return error (so that we back off)
    // and don't do anything else
    //
    if (sr.project_is_down) {
        if (sr.request_delay) {
            double x = now + sr.request_delay;
            project->set_min_rpc_time(x, "project is down");
        }
        return ERR_PROJECT_DOWN;
    }

    // if the scheduler reply includes global preferences,
    // insert extra elements, write to disk, and parse
    //
    if (sr.global_prefs_xml) {
        // skip this if we have host-specific prefs
        // and we're talking to an old scheduler
        //
        if (!global_prefs.host_specific || sr.scheduler_version >= 507) {
            retval = save_global_prefs(
                sr.global_prefs_xml, project->master_url, scheduler_url
            );
            if (retval) {
                return retval;
            }
            update_global_prefs = true;
        } else {
            if (log_flags.sched_op_debug) {
                msg_printf(project, MSG_INFO,
                    "ignoring prefs from old server; we have host-specific prefs"
                );
            }
        }
    }

    // see if we have a new venue from this project
    // (this must go AFTER the above, since otherwise
    // global_prefs_source_project() is meaningless)
    //
    if (strcmp(project->host_venue, sr.host_venue)) {
        safe_strcpy(project->host_venue, sr.host_venue);
        msg_printf(project, MSG_INFO, "New computer location: %s", sr.host_venue);
        update_project_prefs = true;
        if (project == global_prefs_source_project()) {
            strcpy(main_host_venue, sr.host_venue);
            update_global_prefs = true;
        }
    }

    if (update_global_prefs) {
        read_global_prefs();
    }

    // deal with project preferences (should always be there)
    // If they've changed, write to account file,
    // then parse to get our venue, and pass to running apps
    //
    if (sr.project_prefs_xml) {
        if (strcmp(project->project_prefs.c_str(), sr.project_prefs_xml)) {
            project->project_prefs = string(sr.project_prefs_xml);
            update_project_prefs = true;
        }
    }

    // the account file has GUI URLs and project prefs.
    // rewrite if either of these has changed
    //
    if (project->gui_urls != old_gui_urls || update_project_prefs) {
        retval = project->write_account_file();
        if (retval) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Can't write account file: %s", boincerror(retval)
            );
            return retval;
        }
    }

    if (update_project_prefs) {
        project->parse_account_file();
        if (strlen(project->host_venue)) {
            project->parse_account_file_venue();
        }
        project->parse_preferences_for_user_files();
        active_tasks.request_reread_prefs(project);
    }

    // if the scheduler reply includes a code-signing key,
    // accept it if we don't already have one from the project.
    // Otherwise verify its signature, using the key we already have.
    //

    if (sr.code_sign_key) {
        if (!strlen(project->code_sign_key)) {
            safe_strcpy(project->code_sign_key, sr.code_sign_key);
        } else {
            if (sr.code_sign_key_signature) {
                retval = check_string_signature2(
                    sr.code_sign_key, sr.code_sign_key_signature,
                    project->code_sign_key, signature_valid
                );
                if (!retval && signature_valid) {
                    safe_strcpy(project->code_sign_key, sr.code_sign_key);
                } else {
                    msg_printf(project, MSG_INTERNAL_ERROR,
                        "New code signing key doesn't validate"
                    );
                }
            } else {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Missing code sign key signature"
                );
            }
        }
    }

    // copy new entities to client state
    //
    for (i=0; i<sr.apps.size(); i++) {
        APP* app = lookup_app(project, sr.apps[i].name);
        if (app) {
            strcpy(app->user_friendly_name, sr.apps[i].user_friendly_name);
        } else {
            app = new APP;
            *app = sr.apps[i];
            retval = link_app(project, app);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't handle application %s in scheduler reply", app->name
                );
                delete app;
            } else {
                apps.push_back(app);
            }
        }
    }
    FILE_INFO* fip;
    for (i=0; i<sr.file_infos.size(); i++) {
        fip = lookup_file_info(project, sr.file_infos[i].name);
        if (fip) {
            fip->merge_info(sr.file_infos[i]);
        } else {
            fip = new FILE_INFO;
            *fip = sr.file_infos[i];
            retval = link_file_info(project, fip);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't handle file %s in scheduler reply", fip->name
                );
                delete fip;
            } else {
                file_infos.push_back(fip);
            }
        }
    }
    for (i=0; i<sr.file_deletes.size(); i++) {
        fip = lookup_file_info(project, sr.file_deletes[i].c_str());
        if (fip) {
            if (log_flags.file_xfer_debug) {
                msg_printf(project, MSG_INFO,
                    "[file_xfer_debug] Got server request to delete file %s",
                    fip->name
                );
            }
            fip->sticky = false;
        }
    }
    for (i=0; i<sr.app_versions.size(); i++) {
        if (project->anonymous_platform) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "App version returned from anonymous platform project; ignoring"
            );
            continue;
        }
        APP_VERSION& avpp = sr.app_versions[i];
        if (strlen(avpp.platform) == 0) {
            strcpy(avpp.platform, get_primary_platform());
        } else {
            if (!is_supported_platform(avpp.platform)) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "App version has unsupported platform %s", avpp.platform
                );
                continue;
            }
        }
        if (avpp.missing_coproc) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "App version uses non-existent %s GPU",
                avpp.missing_coproc_name
            );
        }
        APP* app = lookup_app(project, avpp.app_name);
        if (!app) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Missing app %s", avpp.app_name
            );
            continue;
        }
        APP_VERSION* avp = lookup_app_version(
            app, avpp.platform, avpp.version_num, avpp.plan_class
        );
        if (avp) {
            // update performance-related info;
            // generally this shouldn't change,
            // but if it does it's better to use the new stuff
            //
            avp->avg_ncpus = avpp.avg_ncpus;
            avp->max_ncpus = avpp.max_ncpus;
            avp->flops = avpp.flops;
            strcpy(avp->cmdline, avpp.cmdline);
            avp->gpu_usage = avpp.gpu_usage;
            strlcpy(avp->api_version, avpp.api_version, sizeof(avp->api_version));
            avp->dont_throttle = avpp.dont_throttle;
            avp->needs_network = avpp.needs_network;

            // if we had download failures, clear them
            //
            avp->clear_errors();
            continue;
        }
        avp = new APP_VERSION;
        *avp = avpp;
        retval = link_app_version(project, avp);
        if (retval) {
             delete avp;
             continue;
        }
        app_versions.push_back(avp);
    }
    for (i=0; i<sr.workunits.size(); i++) {
        if (lookup_workunit(project, sr.workunits[i].name)) continue;
        WORKUNIT* wup = new WORKUNIT;
        *wup = sr.workunits[i];
        wup->project = project;
        retval = link_workunit(project, wup);
        if (retval) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Can't handle task %s in scheduler reply", wup->name
            );
            delete wup;
            continue;
        }
        wup->clear_errors();
        workunits.push_back(wup);
    }
    double est_rsc_runtime[MAX_RSC];
    for (int j=0; j<coprocs.n_rsc; j++) {
        est_rsc_runtime[j] = 0;
    }
    for (i=0; i<sr.results.size(); i++) {
        if (lookup_result(project, sr.results[i].name)) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Already have task %s\n", sr.results[i].name
            );
            continue;
        }
        RESULT* rp = new RESULT;
        *rp = sr.results[i];
        retval = link_result(project, rp);
        if (retval) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Can't handle task %s in scheduler reply", rp->name
            );
            delete rp;
            continue;
        }
        if (strlen(rp->platform) == 0) {
            strcpy(rp->platform, get_primary_platform());
            rp->version_num = latest_version(rp->wup->app, rp->platform);
        }
        rp->avp = lookup_app_version(
            rp->wup->app, rp->platform, rp->version_num, rp->plan_class
        );
        if (!rp->avp) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "No app version found for app %s platform %s ver %d class %s; discarding %s",
                rp->wup->app->name, rp->platform, rp->version_num, rp->plan_class, rp->name
            );
            delete rp;
            continue;
        }
        if (rp->avp->missing_coproc) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Missing coprocessor for task %s; aborting", rp->name
            );
            rp->abort_inactive(EXIT_MISSING_COPROC);
        } else {
            rp->set_state(RESULT_NEW, "handle_scheduler_reply");
            int rt = rp->avp->gpu_usage.rsc_type;
            if (rt > 0) {
                est_rsc_runtime[rt] += rp->estimated_runtime();
                gpus_usable = true;
                    // trigger a check of whether GPU is actually usable
            } else {
                est_rsc_runtime[0] += rp->estimated_runtime();
            }
        }
        rp->wup->version_num = rp->version_num;
        rp->received_time = now;
        new_results.push_back(rp);
        results.push_back(rp);
    }
    sort_results();

    if (log_flags.sched_op_debug) {
        if (sr.results.size()) {
            for (int j=0; j<coprocs.n_rsc; j++) {
                msg_printf(project, MSG_INFO,
                    "[sched_op] estimated total %s task duration: %.0f seconds",
                    rsc_name(j),
                    est_rsc_runtime[j]/time_stats.availability_frac(j)
                );
            }
        }
    }

    // update records for ack'ed results
    //
    for (i=0; i<sr.result_acks.size(); i++) {
        if (log_flags.sched_op_debug) {
            msg_printf(project, MSG_INFO,
                "[sched_op] handle_scheduler_reply(): got ack for task %s\n",
                sr.result_acks[i].name
            );
        }
        RESULT* rp = lookup_result(project, sr.result_acks[i].name);
        if (rp) {
            rp->got_server_ack = true;
        } else {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Got ack for task %s, but can't find it", sr.result_acks[i].name
            );
        }
    }

    // handle result abort requests
    //
    for (i=0; i<sr.result_abort.size(); i++) {
        RESULT* rp = lookup_result(project, sr.result_abort[i].name);
        if (rp) {
            ACTIVE_TASK* atp = lookup_active_task_by_result(rp);
            if (atp) {
                atp->abort_task(EXIT_ABORTED_BY_PROJECT,
                    "aborted by project - no longer usable"
                );
            } else {
                rp->abort_inactive(EXIT_ABORTED_BY_PROJECT);
            }
        } else {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Server requested abort of unknown task %s",
                sr.result_abort[i].name
            );
        }
    }
    for (i=0; i<sr.result_abort_if_not_started.size(); i++) {
        RESULT* rp = lookup_result(project, sr.result_abort_if_not_started[i].name);
        if (!rp) {
            msg_printf(project, MSG_INTERNAL_ERROR,
                "Server requested conditional abort of unknown task %s",
                sr.result_abort_if_not_started[i].name
            );
            continue;
        }
        if (rp->not_started) {
            rp->abort_inactive(EXIT_ABORTED_BY_PROJECT);
        }
    }

    // remove acked trickle files
    //
    if (sr.message_ack) {
        remove_trickle_files(project);
    }
    if (sr.send_full_workload) {
        project->send_full_workload = true;
    }
    project->dont_use_dcf = sr.dont_use_dcf;
    project->send_time_stats_log = sr.send_time_stats_log;
    project->send_job_log = sr.send_job_log;
    project->trickle_up_pending = false;

    // The project returns a hostid only if it has created a new host record.
    // In that case reset RPC seqno
    //
    if (sr.hostid) {
        if (project->hostid) {
            // if we already have a host ID for this project,
            // we must have sent it a stale seqno,
            // which usually means our state file was copied from another host.
            // So generate a new host CPID.
            //
            generate_new_host_cpid();
            msg_printf(project, MSG_INFO,
                "Generated new computer cross-project ID: %s",
                host_info.host_cpid
            );
        }
        //msg_printf(project, MSG_INFO, "Changing host ID from %d to %d", project->hostid, sr.hostid);
        project->hostid = sr.hostid;
        project->rpc_seqno = 0;
    }

#ifdef ENABLE_AUTO_UPDATE
    if (sr.auto_update.present) {
        if (!sr.auto_update.validate_and_link(project)) {
            auto_update = sr.auto_update;
        }
    }
#endif

    project->project_files = sr.project_files;
    project->link_project_files();
    project->create_project_file_symlinks();

    if (log_flags.state_debug) {
        msg_printf(project, MSG_INFO,
            "[state] handle_scheduler_reply(): State after handle_scheduler_reply():"
        );
        print_summary();
    }

    // the following must precede the backoff and request_delay checks,
    // since it overrides them
    //
    if (sr.next_rpc_delay) {
        project->next_rpc_time = now + sr.next_rpc_delay;
    } else {
        project->next_rpc_time = 0;
    }

    work_fetch.handle_reply(project, &sr, new_results);

    project->nrpc_failures = 0;
    project->min_rpc_time = 0;

    if (sr.request_delay) {
        double x = now + sr.request_delay;
        project->set_min_rpc_time(x, "requested by project");
    }

    if (sr.got_rss_feeds) {
        handle_sr_feeds(sr.sr_feeds, project);
    }

    update_trickle_up_urls(project, sr.trickle_up_urls);

    // garbage collect in case the project sent us some irrelevant FILE_INFOs;
    // avoid starting transfers for them
    //
    gstate.garbage_collect_always();

    return 0;
}
Example #21
0
// parse a scheduler reply.
// Some of the items go into the SCHEDULER_REPLY object.
// Others are copied straight to the PROJECT
//
int SCHEDULER_REPLY::parse(FILE* in, PROJECT* project) {
    char buf[256], msg_buf[1024], pri_buf[256], attr_buf[256];
    int retval;
    MIOFILE mf;
    XML_PARSER xp(&mf);
    std::string delete_file_name;

    mf.init_file(in);
    bool found_start_tag = false, btemp;
    double cpid_time = 0;

    clear();
    safe_strcpy(host_venue, project->host_venue);
        // the project won't send us a venue if it's doing maintenance
        // or doesn't check the DB because no work.
        // Don't overwrite the host venue in that case.
    sr_feeds.clear();
    trickle_up_urls.clear();

    if (!project->anonymous_platform) {
        for (int i=0; i<MAX_RSC; i++) {
            project->no_rsc_apps[i] = false;
        }
    }

    // First line should either be tag (HTTP 1.0) or
    // hex length of response (HTTP 1.1)
    //
    while (!xp.get_tag(attr_buf, sizeof(attr_buf))) {
        if (!found_start_tag) {
            if (xp.match_tag("scheduler_reply")) {
                found_start_tag = true;
            }
            continue;
        }
        if (xp.match_tag("/scheduler_reply")) {

            // update statistics after parsing the scheduler reply
            // add new record if vector is empty or we have a new day
            //
            if (project->statistics.empty() || project->statistics.back().day!=dday()) {
                project->trim_statistics();
                DAILY_STATS nds;
                project->statistics.push_back(nds);
            }
            DAILY_STATS& ds = project->statistics.back();
            ds.day=dday();
            ds.user_total_credit=project->user_total_credit;
            ds.user_expavg_credit=project->user_expavg_credit;
            ds.host_total_credit=project->host_total_credit;
            ds.host_expavg_credit=project->host_expavg_credit;

            project->write_statistics_file();

            if (cpid_time) {
                project->cpid_time = cpid_time;
            } else {
                project->cpid_time = project->user_create_time;
            }
            if (project->dont_use_dcf) {
                project->duration_correction_factor = 1;
            }
            return 0;
        }
        else if (xp.parse_str("project_name", project->project_name, sizeof(project->project_name))) {
            continue;
        }
        else if (xp.parse_str("master_url", master_url, sizeof(master_url))) {
            continue;
        }
        else if (xp.parse_str("symstore", project->symstore, sizeof(project->symstore))) continue;
        else if (xp.parse_str("user_name", project->user_name, sizeof(project->user_name))) continue;
        else if (xp.parse_double("user_total_credit", project->user_total_credit)) continue;
        else if (xp.parse_double("user_expavg_credit", project->user_expavg_credit)) continue;
        else if (xp.parse_double("user_create_time", project->user_create_time)) continue;
        else if (xp.parse_double("cpid_time", cpid_time)) continue;
        else if (xp.parse_str("team_name", project->team_name, sizeof(project->team_name))) continue;
        else if (xp.parse_int("hostid", hostid)) continue;
        else if (xp.parse_double("host_total_credit", project->host_total_credit)) continue;
        else if (xp.parse_double("host_expavg_credit", project->host_expavg_credit)) continue;
        else if (xp.parse_str("host_venue", host_venue, sizeof(host_venue))) continue;
        else if (xp.parse_double("host_create_time", project->host_create_time)) continue;
        else if (xp.parse_double("request_delay", request_delay)) continue;
        else if (xp.parse_double("next_rpc_delay", next_rpc_delay)) continue;
        else if (xp.match_tag("global_preferences")) {
            retval = dup_element_contents(
                xp.f->f,
                "</global_preferences>",
                &global_prefs_xml
            );
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse global prefs in scheduler reply: %s",
                    boincerror(retval)
                );
                return retval;
            }
        } else if (xp.match_tag("project_preferences")) {
            retval = dup_element_contents(
                xp.f->f,
                "</project_preferences>",
                &project_prefs_xml
            );
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse project prefs in scheduler reply: %s",
                    boincerror(retval)
                );
                return retval;
            }
        } else if (xp.match_tag("gui_urls")) {
            std::string foo;
            retval = copy_element_contents(xp.f->f, "</gui_urls>", foo);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse GUI URLs in scheduler reply: %s",
                    boincerror(retval)
                );
                return retval;
            }
            project->gui_urls = "<gui_urls>\n"+foo+"</gui_urls>\n";
        } else if (xp.match_tag("code_sign_key")) {
            retval = dup_element_contents(
                xp.f->f,
                "</code_sign_key>",
                &code_sign_key
            );
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse code sign key in scheduler reply: %s",
                    boincerror(retval)
                );
                return ERR_XML_PARSE;
            }
            strip_whitespace(code_sign_key);
        } else if (xp.match_tag("code_sign_key_signature")) {
            retval = dup_element_contents(
                xp.f->f,
                "</code_sign_key_signature>",
                &code_sign_key_signature
            );
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse code sign key signature in scheduler reply: %s",
                    boincerror(retval)
                );
                return ERR_XML_PARSE;
            }
        } else if (xp.match_tag("app")) {
            APP app;
            retval = app.parse(xp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse application in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                apps.push_back(app);
            }
        } else if (xp.match_tag("file_info")) {
            FILE_INFO file_info;
            retval = file_info.parse(xp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse file info in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                file_infos.push_back(file_info);
            }
        } else if (xp.match_tag("app_version")) {
            APP_VERSION av;
            retval = av.parse(xp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse application version in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                app_versions.push_back(av);
            }
        } else if (xp.match_tag("workunit")) {
            WORKUNIT wu;
            retval = wu.parse(xp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse workunit in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                workunits.push_back(wu);
            }
        } else if (xp.match_tag("result")) {
            RESULT result;      // make sure this is here so constructor
                                // gets called each time
            retval = result.parse_server(xp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse task in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                results.push_back(result);
            }
        } else if (xp.match_tag("result_ack")) {
            RESULT result;
            retval = result.parse_name(xp, "/result_ack");
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse ack in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                result_acks.push_back(result);
            }
        } else if (xp.match_tag("result_abort")) {
            RESULT result;
            retval = result.parse_name(xp, "/result_abort");
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse result abort in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                result_abort.push_back(result);
            }
        } else if (xp.match_tag("result_abort_if_not_started")) {
            RESULT result;
            retval = result.parse_name(xp, "/result_abort_if_not_started");
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't parse result abort-if-not-started in scheduler reply: %s",
                    boincerror(retval)
                );
            } else {
                result_abort_if_not_started.push_back(result);
            }
        } else if (xp.parse_string("delete_file_info", delete_file_name)) {
            file_deletes.push_back(delete_file_name);
        } else if (xp.parse_str("message", msg_buf, sizeof(msg_buf))) {
            parse_attr(attr_buf, "priority", pri_buf, sizeof(pri_buf));
            USER_MESSAGE um(msg_buf, pri_buf);
            messages.push_back(um);
            continue;
        } else if (xp.parse_bool("message_ack", message_ack)) {
            continue;
        } else if (xp.parse_bool("project_is_down", project_is_down)) {
            continue;
        } else if (xp.parse_str("email_hash", project->email_hash, sizeof(project->email_hash))) {
            continue;
        } else if (xp.parse_str("cross_project_id", project->cross_project_id, sizeof(project->cross_project_id))) {
            continue;
        } else if (xp.parse_str("external_cpid", project->external_cpid, sizeof(project->external_cpid))) {
            continue;
        } else if (xp.match_tag("trickle_down")) {
            retval = gstate.handle_trickle_down(project, in);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "handle_trickle_down failed: %s", boincerror(retval)
                );
            }
            continue;
        } else if (xp.parse_bool("non_cpu_intensive", project->non_cpu_intensive)) {
            continue;
        } else if (xp.parse_bool("ended", project->ended)) {
            continue;
        } else if (xp.parse_bool("no_cpu_apps", btemp)) {
            if (!project->anonymous_platform) {
                handle_no_rsc_apps("CPU", project, btemp);
            }
            continue;

        // deprecated syntax
        } else if (xp.parse_bool("no_cuda_apps", btemp)) {
            if (!project->anonymous_platform) {
                handle_no_rsc_apps(GPU_TYPE_NVIDIA, project, btemp);
            }
            continue;
        } else if (xp.parse_bool("no_ati_apps", btemp)) {
            if (!project->anonymous_platform) {
                handle_no_rsc_apps(GPU_TYPE_ATI, project, btemp);
            }
            continue;

        } else if (xp.parse_str("no_rsc_apps", buf, sizeof(buf))) {
            if (!project->anonymous_platform) {
                handle_no_rsc_apps(buf, project, true);
            }
            continue;
        } else if (xp.parse_bool("verify_files_on_app_start", project->verify_files_on_app_start)) {
            continue;
        } else if (xp.parse_bool("send_full_workload", send_full_workload)) {
            continue;
        } else if (xp.parse_bool("dont_use_dcf", dont_use_dcf)) {
            continue;
        } else if (xp.parse_int("send_time_stats_log", send_time_stats_log)){
            continue;
        } else if (xp.parse_int("send_job_log", send_job_log)) {
            continue;
        } else if (xp.parse_int("scheduler_version", scheduler_version)) {
            continue;
        } else if (xp.match_tag("project_files")) {
            retval = parse_project_files(xp, project_files);
#ifdef ENABLE_AUTO_UPDATE
        } else if (xp.match_tag("auto_update")) {
            retval = auto_update.parse(xp);
            if (!retval) auto_update.present = true;
#endif
        } else if (xp.match_tag("rss_feeds")) {
            got_rss_feeds = true;
            parse_rss_feed_descs(xp, sr_feeds);
            continue;
        } else if (xp.match_tag("trickle_up_urls")) {
            parse_trickle_up_urls(xp, trickle_up_urls);
            continue;
        } else if (xp.parse_int("userid", project->userid)) {
            continue;
        } else if (xp.parse_int("teamid", project->teamid)) {
            continue;
        } else if (xp.parse_double("desired_disk_usage", project->desired_disk_usage)) {
            continue;
        } else {
            if (log_flags.unparsed_xml) {
                msg_printf(project, MSG_INFO,
                    "[unparsed_xml] SCHEDULER_REPLY::parse(): unrecognized %s\n",
                    xp.parsed_tag
                );
            }
        }
    }
    if (found_start_tag) {
        msg_printf(project, MSG_INTERNAL_ERROR, "No close tag in scheduler reply");
    } else {
        msg_printf(project, MSG_INTERNAL_ERROR, "No start tag in scheduler reply");
    }

    return ERR_XML_PARSE;
}
Example #22
0
// Handle a task that has finished.
// Mark its output files as present, and delete scratch files.
// Don't delete input files because they might be shared with other WUs.
// Update state of result record.
//
int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
    RESULT* rp = at.result;
    bool had_error = false;

#ifndef SIM
    FILE_INFO* fip;
    unsigned int i;
    char path[MAXPATHLEN];
    int retval;
    double size;

    // scan the output files, check if missing or too big.
    // Don't bother doing this if result was aborted via GUI or by project
    //
    switch (rp->exit_status) {
    case EXIT_ABORTED_VIA_GUI:
    case EXIT_ABORTED_BY_PROJECT:
        break;
    default:
        for (i=0; i<rp->output_files.size(); i++) {
            FILE_REF& fref = rp->output_files[i];
            fip = fref.file_info;
            if (fip->uploaded) continue;
            get_pathname(fip, path, sizeof(path));
            retval = file_size(path, size);
            if (retval) {
                if (fref.optional) {
                    fip->upload_urls.clear();
                    continue;
                }

                // an output file is unexpectedly absent.
                //
                fip->status = retval;
                had_error = true;
                msg_printf(
                    rp->project, MSG_INFO,
                    "Output file %s for task %s absent",
                    fip->name, rp->name
                );
            } else if (size > fip->max_nbytes) {
                // Note: this is only checked when the application finishes.
                // The total disk space is checked while the application is running.
                //
                msg_printf(
                    rp->project, MSG_INFO,
                    "Output file %s for task %s exceeds size limit.",
                    fip->name, rp->name
                );
                msg_printf(
                    rp->project, MSG_INFO,
                    "File size: %f bytes.  Limit: %f bytes",
                    size, fip->max_nbytes
                );

                fip->delete_file();
                fip->status = ERR_FILE_TOO_BIG;
                had_error = true;
            } else {
                if (!fip->uploadable() && !fip->sticky) {
                    fip->delete_file();     // sets status to NOT_PRESENT
                } else {
                    retval = 0;
                    if (fip->gzip_when_done) {
                        retval = fip->gzip();
                    }
                    if (!retval) {
                        retval = md5_file(path, fip->md5_cksum, fip->nbytes);
                    }
                    if (retval) {
                        fip->status = retval;
                        had_error = true;
                    } else {
                        fip->status = FILE_PRESENT;
                    }
                }
            }
        }
    }
#endif

    if (rp->exit_status != 0) {
        had_error = true;
    }

    if (had_error) {
        switch (rp->exit_status) {
        case EXIT_ABORTED_VIA_GUI:
        case EXIT_ABORTED_BY_PROJECT:
            rp->set_state(RESULT_ABORTED, "CS::app_finished");
            break;
        default:
            rp->set_state(RESULT_COMPUTE_ERROR, "CS::app_finished");
        }
        rp->project->njobs_error++;
    } else {
#ifdef SIM
        rp->set_state(RESULT_FILES_UPLOADED, "CS::app_finished");
        rp->set_ready_to_report();
        rp->completed_time = now;
#else
        rp->set_state(RESULT_FILES_UPLOADING, "CS::app_finished");
        rp->append_log_record();
#endif
        rp->project->update_duration_correction_factor(&at);
        rp->project->njobs_success++;
    }

    double elapsed_time = now - rec_interval_start;
    work_fetch.accumulate_inst_sec(&at, elapsed_time);

    rp->project->pwf.request_if_idle_and_uploading = true;
        // set this to allow work fetch if idle instance,
        // even before upload finishes

    return 0;
}
Example #23
0
int CLIENT_STATE::parse_state_file_aux(const char* fname) {
    PROJECT *project=NULL;
    int retval=0;
    string stemp;

    FILE* f = fopen(fname, "r");
    if (!f) return ERR_FOPEN;
    MIOFILE mf;
    XML_PARSER xp(&mf);
    mf.init_file(f);
    while (!xp.get_tag()) {
        if (xp.match_tag("/client_state")) {
            break;
        }
        if (xp.match_tag("client_state")) {
            continue;
        }
        if (xp.match_tag("project")) {
            PROJECT temp_project;
            retval = temp_project.parse_state(xp);
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR, "Can't parse project in state file");
            } else {
#ifdef SIM
                project = new PROJECT;
                *project = temp_project;
                projects.push_back(project);
#else
                project = lookup_project(temp_project.master_url);
                if (project) {
                    project->copy_state_fields(temp_project);
                } else {
                    msg_printf(&temp_project, MSG_INTERNAL_ERROR,
                        "Project %s is in state file but no account file found",
                        temp_project.get_project_name()
                    );
                }
#endif
            }
            continue;
        }
        if (xp.match_tag("app")) {
            APP* app = new APP;
            retval = app->parse(xp);
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Application %s outside project in state file",
                    app->name
                );
                delete app;
                continue;
            }
            if (project->anonymous_platform) {
                delete app;
                continue;
            }
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse application in state file"
                );
                delete app;
                continue;
            }
            retval = link_app(project, app);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't handle application %s in state file",
                    app->name
                );
                delete app;
                continue;
            }
            apps.push_back(app);
            continue;
        }
        if (xp.match_tag("file_info") || xp.match_tag("file")) {
            FILE_INFO* fip = new FILE_INFO;
            retval = fip->parse(xp);
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "File info outside project in state file"
                );
                delete fip;
                continue;
            }
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't handle file info in state file"
                );
                delete fip;
                continue;
            }
            retval = link_file_info(project, fip);
            if (project->anonymous_platform && retval == ERR_NOT_UNIQUE) {
                delete fip;
                continue;
            }
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't handle file info %s in state file",
                    fip->name
                );
                delete fip;
                continue;
            }
            file_infos.push_back(fip);
#ifndef SIM
            // If the file had a failure before,
            // don't start another file transfer
            //
            int failnum;
            if (fip->had_failure(failnum)) {
                if (fip->pers_file_xfer) {
                    delete fip->pers_file_xfer;
                    fip->pers_file_xfer = NULL;
                }
            }
            if (fip->pers_file_xfer) {
                retval = fip->pers_file_xfer->init(fip, fip->pers_file_xfer->is_upload);
                if (retval) {
                    msg_printf(project, MSG_INTERNAL_ERROR,
                        "Can't initialize file transfer for %s",
                        fip->name
                    );
                }
                retval = pers_file_xfers->insert(fip->pers_file_xfer);
                if (retval) {
                    msg_printf(project, MSG_INTERNAL_ERROR,
                        "Can't start persistent file transfer for %s",
                        fip->name
                    );
                }
            }
#endif
            continue;
        }
        if (xp.match_tag("app_version")) {
            APP_VERSION* avp = new APP_VERSION;
            retval = avp->parse(xp);
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Application version outside project in state file"
                );
                delete avp;
                continue;
            }
            if (project->anonymous_platform) {
                delete avp;
                continue;
            }
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse application version in state file"
                );
                delete avp;
                continue;
            } 
            if (strlen(avp->platform) == 0) {
                safe_strcpy(avp->platform, get_primary_platform());
            } else {
                if (!is_supported_platform(avp->platform)) {
                    // if it's a platform we haven't heard of,
                    // must be that the user tried out a 64 bit client
                    // and then reverted to a 32-bit client.
                    // Let's not throw away the app version and its WUs
                    //
#ifndef SIM
                    msg_printf(project, MSG_INTERNAL_ERROR,
                        "App version has unsupported platform %s; changing to %s",
                        avp->platform, get_primary_platform()
                    );
#endif
                    safe_strcpy(avp->platform, get_primary_platform());
                }
            }
            if (avp->missing_coproc) {
                msg_printf(project, MSG_INFO,
                    "Application uses missing %s GPU",
                    avp->missing_coproc_name
                );
            }
            retval = link_app_version(project, avp);
            if (retval) {
                delete avp;
                continue;
            }
            app_versions.push_back(avp);
            continue;
        }
        if (xp.match_tag("workunit")) {
            WORKUNIT* wup = new WORKUNIT;
            retval = wup->parse(xp);
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Workunit outside project in state file"
                );
                delete wup;
                continue;
            }
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse workunit in state file"
                );
                delete wup;
                continue;
            }
            retval = link_workunit(project, wup);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't handle workunit in state file"
                );
                delete wup;
                continue;
            }
            workunits.push_back(wup);
            continue;
        }
        if (xp.match_tag("result")) {
            RESULT* rp = new RESULT;
            retval = rp->parse_state(xp);
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Task %s outside project in state file",
                    rp->name
                );
                delete rp;
                continue;
            }
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse task in state file"
                );
                delete rp;
                continue;
            }
            retval = link_result(project, rp);
            if (retval) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "Can't link task %s in state file",
                    rp->name
                );
                delete rp;
                continue;
            }
            // handle transition from old clients which didn't store result.platform;
            // skip for anon platform
            if (!project->anonymous_platform) {
                if (!strlen(rp->platform) || !is_supported_platform(rp->platform)) {
                    safe_strcpy(rp->platform, get_primary_platform());
                    rp->version_num = latest_version(rp->wup->app, rp->platform);
                }
            }
            rp->avp = lookup_app_version(
                rp->wup->app, rp->platform, rp->version_num, rp->plan_class
            );
            if (!rp->avp) {
                msg_printf(project, MSG_INTERNAL_ERROR,
                    "No application found for task: %s %d %s; discarding",
                    rp->platform, rp->version_num, rp->plan_class
                );
                delete rp;
                continue;
            }
            if (rp->avp->missing_coproc) {
                msg_printf(project, MSG_INFO,
                    "Missing coprocessor for task %s", rp->name
                );
                rp->coproc_missing = true;
            }
            rp->wup->version_num = rp->version_num;
            results.push_back(rp);
            continue;
        }
        if (xp.match_tag("project_files")) {
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Project files outside project in state file"
                );
                xp.skip_unexpected();
                continue;
            }
            parse_project_files(xp, project->project_files);
            project->link_project_files();
            continue;
        }
        if (xp.match_tag("host_info")) {
#ifdef SIM
            retval = host_info.parse(xp, false);
            coprocs = host_info.coprocs;
            coprocs.bound_counts();
#else
            retval = host_info.parse(xp, true);
#endif
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse host info in state file"
                );
            }
            continue;
        }
        if (xp.match_tag("time_stats")) {
            retval = time_stats.parse(xp);
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse time stats in state file"
                );
            }
            continue;
        }
        if (xp.match_tag("net_stats")) {
            retval = net_stats.parse(xp);
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse network stats in state file"
                );
            }
            continue;
        }
        if (xp.match_tag("active_task_set")) {
            retval = active_tasks.parse(xp);
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse active tasks in state file"
                );
            }
            continue;
        }
        if (xp.parse_string("platform_name", statefile_platform_name)) {
            continue;
        }
        if (xp.parse_string("alt_platform", stemp)) {
            continue;
        }
        if (xp.parse_int("user_run_request", retval)) {
            cpu_run_mode.set(retval, 0);
            continue;
        }
        if (xp.parse_int("user_run_prev_request", retval)) {
            cpu_run_mode.set_prev(retval);
            continue;
        }
        if (xp.parse_int("user_gpu_request", retval)) {
            gpu_run_mode.set(retval, 0);
            continue;
        }
        if (xp.parse_int("user_gpu_prev_request", retval)) {
            gpu_run_mode.set_prev(retval);
            continue;
        }
        if (xp.parse_int("user_network_request", retval)) {
            network_run_mode.set(retval, 0);
            continue;
        }
        if (xp.parse_int("core_client_major_version", old_major_version)) {
            continue;
        }
        if (xp.parse_int("core_client_minor_version", old_minor_version)) {
            continue;
        }
        if (xp.parse_int("core_client_release", old_release)) {
            continue;
        }
        if (xp.parse_str("language", language, sizeof(language))) {
            continue;
        }
        if (xp.match_tag("proxy_info")) {
            retval = gui_proxy_info.parse(xp);
            if (retval) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Can't parse proxy info in state file"
                );
            }
            continue;
        }
        if (xp.parse_str("host_venue", main_host_venue, sizeof(main_host_venue))) {
            continue;
        }
        if (xp.parse_double("new_version_check_time", new_version_check_time)) {
            continue;
        }
        if (xp.parse_double("all_projects_list_check_time", all_projects_list_check_time)) {
            continue;
        }
        if (xp.parse_string("newer_version", newer_version)) {
            continue;
        }
#ifdef ENABLE_AUTO_UPDATE
        if (xp.match_tag("auto_update")) {
            if (!project) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "auto update outside project in state file"
                );
                xp.skip_unexpected();
                continue;
            }
            if (!auto_update.parse(xp) && !auto_update.validate_and_link(project)) {
                auto_update.present = true;
            }
            continue;
        }
#endif
        if (log_flags.unparsed_xml) {
            msg_printf(0, MSG_INFO,
                "[unparsed_xml] state_file: unrecognized: %s",
                xp.parsed_tag
            );
        }
        xp.skip_unexpected();
    }
    sort_results();
    fclose(f);
    
    // if total resource share is zero, set all shares to 1
    //
    if (projects.size()) {
        unsigned int i;
        double x=0;
        for (i=0; i<projects.size(); i++) {
            x += projects[i]->resource_share;
        }
        if (!x) {
            msg_printf(NULL, MSG_INFO,
                "All projects have zero resource share; setting to 100"
            );
            for (i=0; i<projects.size(); i++) {
                projects[i]->resource_share = 100;
            }
        }
    }
    return 0;
}
Example #24
0
void do_client_simulation() {
    char buf[256], buf2[256];
    int retval;
    FILE* f;

    sprintf(buf, "%s%s", infile_prefix, CONFIG_FILE);
    cc_config.defaults();
    read_config_file(true, buf);

    log_flags.init();
    sprintf(buf, "%s%s", outfile_prefix, "log_flags.xml");
    f = fopen(buf, "r");
    if (f) {
        MIOFILE mf;
        mf.init_file(f);
        XML_PARSER xp(&mf);
        xp.get_tag();   // skip open tag
        log_flags.parse(xp);
        fclose(f);
    }

    gstate.add_platform("client simulator");
    sprintf(buf, "%s%s", infile_prefix, STATE_FILE_NAME);
    if (!boinc_file_exists(buf)) {
        fprintf(stderr, "No client state file\n");
        exit(1);
    }
    retval = gstate.parse_state_file_aux(buf);
    if (retval) {
        fprintf(stderr, "state file parse error %d\n", retval);
        exit(1);
    }

    // if tasks have pending transfers, mark as completed
    //
    for (unsigned int i=0; i<gstate.results.size(); i++) {
        RESULT* rp = gstate.results[i];
        if (rp->state() < RESULT_FILES_DOWNLOADED) {
            rp->set_state(RESULT_FILES_DOWNLOADED, "init");
        } else if (rp->state() == RESULT_FILES_UPLOADING) {
            rp->set_state(RESULT_FILES_UPLOADED, "init");
        }
    }

    check_app_config(infile_prefix);
    show_app_config();
    cc_config.show();
    log_flags.show();

    sprintf(buf, "%s%s", infile_prefix, GLOBAL_PREFS_FILE_NAME);
    sprintf(buf2, "%s%s", infile_prefix, GLOBAL_PREFS_OVERRIDE_FILE);
    gstate.read_global_prefs(buf, buf2);
    fprintf(index_file,
        "<h3>Output files</h3>\n"
        "<a href=%s>Summary</a>\n"
        "<br><a href=%s>Log file</a>\n",
        SUMMARY_FNAME, LOG_FNAME
    );

    // fill in GPU device nums and OpenCL flags
    //
    for (int i=0; i<coprocs.n_rsc; i++) {
        COPROC& cp = coprocs.coprocs[i];
        for (int j=0; j<cp.count; j++) {
            cp.device_nums[j] = j;
            if (cp.have_opencl) {
                cp.instance_has_opencl[j] = true;
            }
        }
    }
    set_no_rsc_config();
    process_gpu_exclusions();

    get_app_params();
    if (!include_empty_projects) {
        cull_projects();
    }
    fprintf(summary_file, "--------------------------\n");

    int j=0;
    for (unsigned int i=0; i<gstate.projects.size(); i++) {
        gstate.projects[i]->index = j++;
    }

    clear_backoff();

    gstate.log_show_projects();
    gstate.set_ncpus();
    work_fetch.init();

    //set_initial_rec();

    rec_adjust_period = delta;

    gstate.request_work_fetch("init");
    simulate();

    sim_results.compute_figures_of_merit();

    sprintf(buf, "%s%s", outfile_prefix, RESULTS_DAT_FNAME);
    f = fopen(buf, "w");
    sim_results.print(f);
    fclose(f);
    sprintf(buf, "%s%s", outfile_prefix, RESULTS_TXT_FNAME);
    f = fopen(buf, "w");
    sim_results.print(f, true);
    fclose(f);

    fprintf(summary_file,
        "Simulation done.\n"
        "-------------------------\n"
        "Figures of merit:\n"
    );

    sim_results.print(summary_file, true);

    double cpu_time;
    boinc_calling_thread_cpu_time(cpu_time);
    fprintf(summary_file,
        "-------------------------\n"
        "Simulator CPU time: %f secs\n"
        "-------------------------\n"
        "Peak FLOPS: CPU %.2fG GPU %.2fG\n",
        cpu_time,
        cpu_peak_flops()/1e9,
        gpu_peak_flops()/1e9
    );
    print_project_results(summary_file);

    fclose(rec_file);
    make_graph("REC", "rec", 0);
}
Example #25
0
// simulate trying to do an RPC;
// return true if we actually did one
//
bool CLIENT_STATE::simulate_rpc(PROJECT* p) {
    char buf[256], buf2[256];
    vector<IP_RESULT> ip_results;
    vector<RESULT*> new_results;

    bool avail;
    if (p->last_rpc_time) {
        double delta = now - p->last_rpc_time;
        avail = p->available.sample(delta);
    } else {
        avail = p->available.sample(0);
    }
    p->last_rpc_time = now;
    if (!avail) {
        sprintf(buf, "RPC to %s skipped - project down<br>", p->project_name);
        html_msg += buf;
        msg_printf(p, MSG_INFO, "RPC skipped: project down");
        gstate.scheduler_op->project_rpc_backoff(p, "project down");
        p->master_url_fetch_pending = false;
        return false;
    }

    // save request params for WORK_FETCH::handle_reply
    //
    double save_cpu_req_secs = rsc_work_fetch[0].req_secs;
    for (int i=1; i<coprocs.n_rsc; i++) {
        COPROC& cp = coprocs.coprocs[i];
        if (!strcmp(cp.type, "NVIDIA")) {
            coprocs.nvidia.req_secs = rsc_work_fetch[i].req_secs;
        }
        if (!strcmp(cp.type, "ATI")) {
            coprocs.ati.req_secs = rsc_work_fetch[i].req_secs;
        }
        if (!strcmp(cp.type, "intel_gpu")) {
            coprocs.intel_gpu.req_secs = rsc_work_fetch[i].req_secs;
        }
    }

    if (!server_uses_workload) {
        for (int i=0; i<coprocs.n_rsc; i++) {
            rsc_work_fetch[i].estimated_delay = rsc_work_fetch[i].busy_time_estimator.get_busy_time();
        }
    }

    for (unsigned int i=0; i<app_versions.size(); i++) {
        app_versions[i]->dont_use = false;
    }

    work_fetch.request_string(buf2, sizeof(buf2));
    sprintf(buf, "RPC to %s: %s<br>", p->project_name, buf2);
    html_msg += buf;

    msg_printf(p, MSG_INFO, "RPC: %s", buf2);

    handle_completed_results(p);

    if (server_uses_workload) {
        get_workload(ip_results);
    }

    bool sent_something = false;
    while (!existing_jobs_only) {
        vector<APP*> apps;
        get_apps_needing_work(p, apps);
        if (apps.empty()) break;
        RESULT* rp = new RESULT;
        WORKUNIT* wup = new WORKUNIT;
        make_job(p, wup, rp, apps);

        double et = wup->rsc_fpops_est / rp->avp->flops;
        if (server_uses_workload) {
            IP_RESULT c(rp->name, rp->report_deadline-now, et);
            if (check_candidate(c, ncpus, ip_results)) {
                ip_results.push_back(c);
            } else {
                msg_printf(p, MSG_INFO, "job for %s misses deadline sim\n", rp->app->name);
                APP_VERSION* avp = rp->avp;
                delete rp;
                delete wup;
                avp->dont_use = true;
                continue;
            }
        } else {
            double est_delay = get_estimated_delay(rp);
            if (est_delay + et > wup->app->latency_bound) {
                msg_printf(p, MSG_INFO,
                    "job for %s misses deadline approx: del %f + et %f > %f\n",
                    rp->app->name,
                    est_delay, et, wup->app->latency_bound
                );
                APP_VERSION* avp = rp->avp;
                delete rp;
                delete wup;
                avp->dont_use = true;
                continue;
            }
        }

        sent_something = true;
        rp->set_state(RESULT_FILES_DOWNLOADED, "simulate_rpc");
        results.push_back(rp);
        new_results.push_back(rp);
#if 0
        sprintf(buf, "got job %s: CPU time %.2f, deadline %s<br>",
            rp->name, rp->final_cpu_time, time_to_string(rp->report_deadline)
        );
        html_msg += buf;
#endif
        decrement_request(rp);
    }

    njobs += (int)new_results.size();
    msg_printf(0, MSG_INFO, "Got %lu tasks", new_results.size());
    sprintf(buf, "got %lu tasks<br>", new_results.size());
    html_msg += buf;

    SCHEDULER_REPLY sr;
    rsc_work_fetch[0].req_secs = save_cpu_req_secs;
    work_fetch.handle_reply(p, &sr, new_results);
    p->nrpc_failures = 0;
    p->sched_rpc_pending = 0;
    //p->min_rpc_time = now + 900;
    p->min_rpc_time = now;
    if (sent_something) {
        request_schedule_cpus("simulate_rpc");
        request_work_fetch("simulate_rpc");
    }
    sim_results.nrpcs++;
    return true;
}
Example #26
0
// return an error message or NULL
//
const char* SCHEDULER_REQUEST::parse(FILE* fin) {
    char buf[256];
    RESULT result;
    int retval;

    strcpy(authenticator, "");
    strcpy(platform.name, "");
    strcpy(cross_project_id, "");
    hostid = 0;
    core_client_major_version = 0;
    core_client_minor_version = 0;
    core_client_release = 0;
    rpc_seqno = 0;
    work_req_seconds = 0;
    cpu_req_secs = 0;
    cpu_req_instances = 0;
    resource_share_fraction = 1.0;
    rrs_fraction = 1.0;
    prrs_fraction = 1.0;
    cpu_estimated_delay = 0;
    strcpy(global_prefs_xml, "");
    strcpy(working_global_prefs_xml, "");
    strcpy(code_sign_key, "");
    memset(&global_prefs, 0, sizeof(global_prefs));
    memset(&host, 0, sizeof(host));
    have_other_results_list = false;
    have_ip_results_list = false;
    have_time_stats_log = false;
    client_cap_plan_class = false;
    sandbox = -1;
    coproc_cuda = 0;
    coproc_ati = 0;

    fgets(buf, sizeof(buf), fin);
    if (!match_tag(buf, "<scheduler_request>")) return "no start tag";
    while (fgets(buf, sizeof(buf), fin)) {
        // If a line is too long, ignore it.
        // This can happen e.g. if the client has bad global_prefs.xml
        // This won't be necessary if we rewrite this using XML_PARSER
        //
        if (!strchr(buf, '\n')) {
            while (fgets(buf, sizeof(buf), fin)) {
                if (strchr(buf, '\n')) break;
            }
            continue;
        }

        if (match_tag(buf, "</scheduler_request>")) {
            core_client_version = 10000*core_client_major_version + 100*core_client_minor_version + core_client_release;
            return NULL;
        }
        if (parse_str(buf, "<authenticator>", authenticator, sizeof(authenticator))) {
            remove_quotes(authenticator);
            continue;
        }
        if (parse_str(buf, "<cross_project_id>", cross_project_id, sizeof(cross_project_id))) continue;
        if (parse_int(buf, "<hostid>", hostid)) continue;
        if (parse_int(buf, "<rpc_seqno>", rpc_seqno)) continue;
        if (parse_str(buf, "<platform_name>", platform.name, sizeof(platform.name))) continue;
        if (match_tag(buf, "<alt_platform>")) {
            CLIENT_PLATFORM cp;
            retval = cp.parse(fin);
            if (!retval) {
                alt_platforms.push_back(cp);
            }
            continue;
        }
        if (match_tag(buf, "<app_versions>")) {
            while (fgets(buf, sizeof(buf), fin)) {
                if (match_tag(buf, "</app_versions>")) break;
                if (match_tag(buf, "<app_version>")) {
                    CLIENT_APP_VERSION cav;
                    retval = cav.parse(fin);
                    if (retval) {
                        g_reply->insert_message(
                            "Invalid app version description", "high"
                        );
                    } else {
                        client_app_versions.push_back(cav);
                    }
                }
            }
            continue;
        }
        if (parse_int(buf, "<core_client_major_version>", core_client_major_version)) continue;
        if (parse_int(buf, "<core_client_minor_version>", core_client_minor_version)) continue;
        if (parse_int(buf, "<core_client_release>", core_client_release)) continue;
        if (parse_double(buf, "<work_req_seconds>", work_req_seconds)) continue;
        if (parse_double(buf, "<cpu_req_secs>", cpu_req_secs)) continue;
        if (parse_double(buf, "<cpu_req_instances>", cpu_req_instances)) continue;
        if (parse_double(buf, "<resource_share_fraction>", resource_share_fraction)) continue;
        if (parse_double(buf, "<rrs_fraction>", rrs_fraction)) continue;
        if (parse_double(buf, "<prrs_fraction>", prrs_fraction)) continue;
        if (parse_double(buf, "<estimated_delay>", cpu_estimated_delay)) continue;
        if (parse_double(buf, "<duration_correction_factor>", host.duration_correction_factor)) continue;
        if (match_tag(buf, "<global_preferences>")) {
            strcpy(global_prefs_xml, "<global_preferences>\n");
            while (fgets(buf, sizeof(buf), fin)) {
                if (strstr(buf, "</global_preferences>")) break;
                safe_strcat(global_prefs_xml, buf);
            }
            safe_strcat(global_prefs_xml, "</global_preferences>\n");
            continue;
        }
        if (match_tag(buf, "<working_global_preferences>")) {
            while (fgets(buf, sizeof(buf), fin)) {
                if (strstr(buf, "</working_global_preferences>")) break;
                safe_strcat(working_global_prefs_xml, buf);
            }
            continue;
        }
        if (parse_str(buf, "<global_prefs_source_email_hash>", global_prefs_source_email_hash, sizeof(global_prefs_source_email_hash))) continue;
        if (match_tag(buf, "<host_info>")) {
            host.parse(fin);
            continue;
        }
        if (match_tag(buf, "<time_stats>")) {
            host.parse_time_stats(fin);
            continue;
        }
        if (match_tag(buf, "<time_stats_log>")) {
            handle_time_stats_log(fin);
            have_time_stats_log = true;
            continue;
        }
        if (match_tag(buf, "<net_stats>")) {
            host.parse_net_stats(fin);
            continue;
        }
        if (match_tag(buf, "<disk_usage>")) {
            host.parse_disk_usage(fin);
            continue;
        }
        if (match_tag(buf, "<result>")) {
            result.parse_from_client(fin);
            static int max_results = 200;
            --max_results;
            if (max_results >= 0)
            results.push_back(result);
            continue;
        }
        if (match_tag(buf, "<code_sign_key>")) {
            copy_element_contents(fin, "</code_sign_key>", code_sign_key, sizeof(code_sign_key));
            continue;
        }
        if (match_tag(buf, "<msg_from_host>")) {
            MSG_FROM_HOST_DESC md;
            retval = md.parse(fin);
            if (!retval) {
                msgs_from_host.push_back(md);
            }
            continue;
        }
        if (match_tag(buf, "<file_info>")) {
            FILE_INFO fi;
            retval = fi.parse(fin);
            if (!retval) {
                file_infos.push_back(fi);
            }
            continue;
        }
        if (match_tag(buf, "<host_venue>")) {
            continue;
        }
        if (match_tag(buf, "<other_results>")) {
            have_other_results_list = true;
            while (fgets(buf, sizeof(buf), fin)) {
                if (match_tag(buf, "</other_results>")) break;
                if (match_tag(buf, "<other_result>")) {
                    OTHER_RESULT o_r;
                    retval = o_r.parse(fin);
                    if (!retval) {
                        other_results.push_back(o_r);
                    }
                }
            }
            continue;
        }
        if (match_tag(buf, "<in_progress_results>")) {
            have_ip_results_list = true;
            int i = 0;
            double now = time(0);
            while (fgets(buf, sizeof(buf), fin)) {
                if (match_tag(buf, "</in_progress_results>")) break;
                if (match_tag(buf, "<ip_result>")) {
                    IP_RESULT ir;
                    retval = ir.parse(fin);
                    if (!retval) {
                        if (!strlen(ir.name)) {
                            sprintf(ir.name, "ip%d", i++);
                        }
                        ir.report_deadline -= now;
                        ip_results.push_back(ir);
                    }
                }
            }
            continue;
        }
        if (match_tag(buf, "coprocs")) {
            MIOFILE mf;
            mf.init_file(fin);
            coprocs.parse(mf);
            coproc_cuda = (COPROC_CUDA*)coprocs.lookup("CUDA");
            coproc_ati = (COPROC_ATI*)coprocs.lookup("ATI");
            continue;
        }
        if (parse_bool(buf, "client_cap_plan_class", client_cap_plan_class)) continue;
        if (parse_int(buf, "<sandbox>", sandbox)) continue;

        if (match_tag(buf, "<active_task_set>")) continue;
        if (match_tag(buf, "<app>")) continue;
        if (match_tag(buf, "<app_version>")) continue;
        if (match_tag(buf, "<duration_variability>")) continue;
        if (match_tag(buf, "<new_version_check_time>")) continue;
        if (match_tag(buf, "<newer_version>")) continue;
        if (match_tag(buf, "<project>")) continue;
        if (match_tag(buf, "<project_files>")) continue;
        if (match_tag(buf, "<proxy_info>")) continue;
        if (match_tag(buf, "<user_network_request>")) continue;
        if (match_tag(buf, "<user_run_request>")) continue;
        if (match_tag(buf, "<master_url>")) continue;
        if (match_tag(buf, "<project_name>")) continue;
        if (match_tag(buf, "<user_name>")) continue;
        if (match_tag(buf, "<team_name>")) continue;
        if (match_tag(buf, "<email_hash>")) continue;
        if (match_tag(buf, "<user_total_credit>")) continue;
        if (match_tag(buf, "<user_expavg_credit>")) continue;
        if (match_tag(buf, "<user_create_time>")) continue;
        if (match_tag(buf, "<host_total_credit>")) continue;
        if (match_tag(buf, "<host_expavg_credit>")) continue;
        if (match_tag(buf, "<host_create_time>")) continue;
        if (match_tag(buf, "<nrpc_failures>")) continue;
        if (match_tag(buf, "<master_fetch_failures>")) continue;
        if (match_tag(buf, "<min_rpc_time>")) continue;
        if (match_tag(buf, "<short_term_debt>")) continue;
        if (match_tag(buf, "<long_term_debt>")) continue;
        if (match_tag(buf, "<resource_share>")) continue;
        if (match_tag(buf, "<scheduler_url>")) continue;
        if (match_tag(buf, "</project>")) continue;
        if (match_tag(buf, "<?xml")) continue;
        strip_whitespace(buf);
        if (!strlen(buf)) continue;

        log_messages.printf(MSG_NORMAL,
            "SCHEDULER_REQUEST::parse(): unrecognized: %s\n", buf
        );
        MIOFILE mf;
        mf.init_file(fin);
        retval = skip_unrecognized(buf, mf);
        if (retval) return "unterminated unrecognized XML";
    }
    return "no end tag";
}