extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req) { //printf("cao: src/sbatch/multi_cluster.c: sbatch_set_first_avail_cluster(job_desc_msg_t *req)\n"); int rc = SLURM_SUCCESS; ListIterator itr; local_cluster_rec_t *local_cluster = NULL; char buf[64]; bool host_set = false; List ret_list = NULL; /* return if we only have 1 or less clusters here */ if (!opt.clusters || !list_count(opt.clusters)) { return rc; } else if (list_count(opt.clusters) == 1) { working_cluster_rec = list_peek(opt.clusters); return rc; } if ((req->alloc_node == NULL) && (gethostname_short(buf, sizeof(buf)) == 0)) { req->alloc_node = buf; host_set = true; } ret_list = list_create(_destroy_local_cluster_rec); itr = list_iterator_create(opt.clusters); while ((working_cluster_rec = list_next(itr))) { if ((local_cluster = _job_will_run(req))) list_append(ret_list, local_cluster); else error("Problem with submit to cluster %s: %m", working_cluster_rec->name); } list_iterator_destroy(itr); if (host_set) req->alloc_node = NULL; if (!list_count(ret_list)) { error("Can't run on any of the clusters given"); rc = SLURM_ERROR; goto end_it; } /* sort the list so the first spot is on top */ local_cluster_name = slurm_get_cluster_name(); list_sort(ret_list, (ListCmpF)_sort_local_cluster); xfree(local_cluster_name); local_cluster = list_peek(ret_list); /* set up the working cluster and be done */ working_cluster_rec = local_cluster->cluster_rec; end_it: list_destroy(ret_list); return rc; }
int main(int argc, char * argv[]) { if (argc < 6) { printf("Usage: %s, auth_key control_addr e_port " "job_id sched_port is_bluegene\n", argv[0]); exit(1); } auth_key = argv[1]; control_addr = argv[2]; e_port = atoi(argv[3]); job_id = atoi(argv[4]); sched_port = atoi(argv[5]); is_bluegene = atoi(argv[6]); printf("auth_key=%s control_addr=%s e_port=%d job_id=%d sched_port=%d " "is_bluegene=%d\n", auth_key, control_addr, e_port, job_id, sched_port, is_bluegene); #if _DEBUG _single_msg(); #else _initialize(); _get_jobs(); _get_nodes(); _job_will_run(job_id); _modify_job(job_id); _get_jobs(); _start_job(job_id); _get_jobs(); if (!is_bluegene) { _suspend_job(job_id); _resume_job(job_id); } _notify_job(job_id); _signal_job(job_id); if (e_port) _event_mgr(); else { printf("READY\n"); sleep(3); } _cancel_job(job_id+1); _job_requeue(job_id); /* Put job back into HELD state */ sleep(15); _start_job(job_id); _get_jobs(); #endif printf("SUCCESS\n"); exit(0); }