static int submit_by_project(const char *project) { drmaa_job_template_t *jt = NULL; char diagnosis[DRMAA_ERROR_STRING_BUFFER]; char jobid[100]; int drmaa_errno, i; for (i=1; i<=40; i++) { if (!(jt = create_job_template(job_path, project, i))) { fprintf(stderr, "create_sleeper_job_template() failed\n"); return 1; } while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis); sleep(1); } if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); return 1; } if (!quiet) printf("\t \"%s\"\n", jobid); drmaa_delete_job_template(jt, NULL, 0); } return 0; }
int main(int argc, char *argv[]) { char diagnosis[DRMAA_ERROR_STRING_BUFFER + 1]; char jobid[DRMAA_JOBNAME_BUFFER + 1]; int drmaa_errno = DRMAA_ERRNO_SUCCESS; const char *job_path = NULL; const char *job_cat = NULL; void *buffer = NULL; size_t buffer_size = 0; drmaa_job_template_t *jt = NULL; int exit_code = 0; job_path = argv[1]; job_cat = argv[2]; if (drmaa_init("", diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); return 1; } /* Submit a job to find out what the current job id is. */ jt = create_job_template(job_path, job_cat); if (jt == NULL) { fprintf(stderr, "create_job_template() failed\n"); exit_code = 1; goto error; } /* Make sure the next available block of memory contains something other than * NULL. */ buffer_size = 256 * sizeof(void*); buffer = malloc(buffer_size); memset(buffer, 255, buffer_size); sge_free(&buffer); drmaa_errno = drmaa_run_job(jobid, DRMAA_JOBNAME_BUFFER, jt, diagnosis, DRMAA_ERROR_STRING_BUFFER); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_job() failed: %s %s\n", diagnosis, drmaa_strerror(drmaa_errno)); exit_code = 1; goto error; } drmaa_delete_job_template(jt, diagnosis, DRMAA_ERROR_STRING_BUFFER); error: if (drmaa_exit(diagnosis, DRMAA_ERROR_STRING_BUFFER) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); exit_code = 1; } return exit_code; }
int main(int argc, char **argv) { drmaa_job_template_t *jt_a = NULL; drmaa_job_template_t *jt_b = NULL; drmaa_job_ids_t *jobids_a = NULL; drmaa_job_ids_t *jobids_b = NULL; const char *base_name = "Job"; char name_a[512], name_b[512]; struct chunking_t { int a; int b; } chunking[] = { { 1, 1 }, { 1, 3 }, { 1, 4 }, { 1, 12 }, { 3, 1 }, { 3, 3 }, { 3, 4 }, { 3, 12 }, { 4, 1 }, { 4, 3 }, { 4, 4 }, { 4, 12 }, { 12, 1 }, { 12, 3 }, { 12, 4 }, { 12, 12 } }; int samples, i, drmaa_errno = 0; while (argc > 2) { if (!strcmp(argv[1], "-ps")) { fprintf(stderr, "use drmaa_job_ps() for progress monitoring\n"); do_ps = 1; argc--; argv++; } if (!strcmp(argv[1], "-1st")) { fprintf(stderr, "first sample only\n"); do_1st = 1; argc--; argv++; } if (!strcmp(argv[1], "-exit_unknown")) { fprintf(stderr, "first sample only\n"); do_exit = 0; argc--; argv++; } if (!strcmp(argv[1], "-nm")) { argc--; argv++; base_name = argv[1]; argc--; argv++; fprintf(stderr, "use \"%s\" as base name\n", base_name); } } if (argc != 2) { printf("Usage: %s path_to_sleeper_script\n", argv[0]); exit(1); } if (strstr(argv[1], "sleeper.sh") == NULL) { printf("Usage: %s path_to_sleeper_script\n", argv[0]); exit(1); } if (do_1st) samples = 1; else samples = sizeof(chunking)/sizeof(void *); while ((drmaa_errno=drmaa_init(NULL, errorbuf, sizeof(errorbuf)-1) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE)) sleep(1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init failed: %s\n", errorbuf); exit(EXIT_FAILURE); } strcpy(name_a, base_name); strcat(name_a, "A"); strcpy(name_b, base_name); strcat(name_b, "B"); for (i=0; i< samples; i++) { jt_a = create_job_template(argv[1], name_a, NULL, chunking[i].a, 1); jt_b = create_job_template(argv[1], name_b, name_a, chunking[i].b, 0); while ((drmaa_errno = drmaa_run_bulk_jobs(&jobids_a, jt_a, 1, BULK_SIZE, chunking[i].a, errorbuf, sizeof(errorbuf)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs(%d-%d:%d) failed: %s\n", 1, BULK_SIZE, chunking[i].a, errorbuf); return 1; } while ((drmaa_errno = drmaa_run_bulk_jobs(&jobids_b, jt_b, 1, BULK_SIZE, chunking[i].b, errorbuf, sizeof(errorbuf)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs(%d-%d:%d) failed: %s\n", 1, BULK_SIZE, chunking[i].b, errorbuf); return 1; } /* sleeper will sleep 60s by default, so this test should be okay */ validate_jobs(jobids_a, chunking[i].a, jobids_b, chunking[i].b); drmaa_release_job_ids(jobids_a); drmaa_release_job_ids(jobids_b); drmaa_errno = drmaa_delete_job_template(jt_a, errorbuf, sizeof(errorbuf)-1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_delete_job_template failed: %s\n", errorbuf); return 1; } drmaa_errno = drmaa_delete_job_template(jt_b, errorbuf, sizeof(errorbuf)-1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_delete_job_template failed: %s\n", errorbuf); return 1; } } while ((drmaa_errno = drmaa_exit(errorbuf, sizeof(errorbuf)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit failed: %s\n", errorbuf); return 1; } printf ("OK\n"); return 0; }
int main(int argc, char *argv[]) { char diagnosis[DRMAA_ERROR_STRING_BUFFER]; const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK+1]; char jobid[100]; int drmaa_errno, i, pos = 0; const char *job_path; drmaa_job_template_t *jt; if (argc<2) { fprintf(stderr, "usage: example <path-to-job>\n"); return 1; } job_path = argv[1]; if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); return 1; } /* * submit some bulk jobs */ if (!(jt = create_job_template(job_path, 5, 1))) { fprintf(stderr, "create_job_template() failed\n"); return 1; } for (i=0; i<NBULKS; i++) { drmaa_job_ids_t *jobids; int j; while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis, sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s %s\n", diagnosis, drmaa_strerror(drmaa_errno)); sleep(1); } if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s %s\n", diagnosis, drmaa_strerror(drmaa_errno)); return 1; } printf("submitted bulk job with jobids:\n"); for (j=0; j<JOB_CHUNK; j++) { drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1); all_jobids[pos++] = strdup(jobid); printf("\t \"%s\"\n", jobid); } drmaa_release_job_ids(jobids); } drmaa_delete_job_template(jt, NULL, 0); /* * submit some sequential jobs */ if (!(jt = create_job_template(job_path, 5, 0))) { fprintf(stderr, "create_sleeper_job_template() failed\n"); return 1; } for (i=0; i<JOB_CHUNK; i++) { while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis); sleep(1); } if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); return 1; } printf("\t \"%s\"\n", jobid); all_jobids[pos++] = strdup(jobid); } /* set string array end mark */ all_jobids[pos] = NULL; drmaa_delete_job_template(jt, NULL, 0); /* * synchronize with all jobs */ drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 0, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis); return 1; } printf("synchronized with all jobs\n"); /* * wait all those jobs */ for (pos=0; pos<NBULKS*JOB_CHUNK + JOB_CHUNK; pos++) { int stat; int aborted, exited, exit_status, signaled; drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis); return 1; } /* * report how job finished */ drmaa_wifaborted(&aborted, stat, NULL, 0); if (aborted) printf("job \"%s\" never ran\n", all_jobids[pos]); else { drmaa_wifexited(&exited, stat, NULL, 0); if (exited) { drmaa_wexitstatus(&exit_status, stat, NULL, 0); printf("job \"%s\" finished regularly with exit status %d\n", all_jobids[pos], exit_status); } else { drmaa_wifsignaled(&signaled, stat, NULL, 0); if (signaled) { char termsig[DRMAA_SIGNAL_BUFFER+1]; drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0); printf("job \"%s\" finished due to signal %s\n", all_jobids[pos], termsig); } else printf("job \"%s\" finished with unclear conditions\n", all_jobids[pos]); } } } if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); return 1; } return 0; }
int main(int argc, char *argv[]) { char diagnosis[DRMAA_ERROR_STRING_BUFFER]; char *s, jobid[100]; int drmaa_errno, i; int ret = 0; struct timeval start_s, finish_s, wait_s; if (argc<2) { usage(); return 1; } i = 1; do { if (!strcmp("-help", argv[i]) || !strcmp("-h", argv[i])) { usage(); return 0; } else if (!strcmp("-jobs", argv[i])) { i++; if (argc < i+1) { usage(); return 1; } njobs = atoi(argv[i]); i++; } else if (!strcmp("-native", argv[i])) { i++; if (argc < i+1) { usage(); return 1; } native_spec = argv[i]; i++; } else if (!strcmp("-threads", argv[i])) { if (argc < i+1) { usage(); return 1; } i++; nthreads = atoi(argv[i]); i++; } else if (!strcmp("-quiet", argv[i])) { i++; if (argc < i+1) { usage(); return 1; } if (!strcmp("yes", argv[i]) || !strcmp("y", argv[i])) quiet = 1; else if (!strcmp("no", argv[i]) || !strcmp("n", argv[i])) quiet = 0; else { usage(); return 1; } i++; } else if (!strcmp("-wait", argv[i])) { i++; if (argc < i+1) { usage(); return 1; } if (!strcmp("yes", argv[i]) || !strcmp("y", argv[i])) dowait = 1; else if (!strcmp("no", argv[i]) || !strcmp("n", argv[i])) dowait = 0; else { usage(); return 1; } i++; } else if (!strcmp("-scenario", argv[i])) { i++; if (argc < i+1) { usage(); return 1; } s = strchr(argv[i], '.'); *s = '\0'; if (strcmp("queue", argv[i]) && strcmp("type", argv[i]) && strcmp("number", argv[i]) && strcmp("pe", argv[i])) { usage(); return 1; } scenario = strdup(argv[i]); s++; if (strcmp("hostgroup", s) && strcmp("resource", s) && strcmp("none", s) && strcmp("softresource", s) && strcmp("softhostgroup", s)) { usage(); return 1; } site_b = strdup(s); i++; } else { job_path = argv[i]; i++; if (job_path[0]=='-') { usage(); return 1; } if (argv[i]) { job_args = &argv[i]; } } } while (i < argc && !job_path); if (!job_path) { usage(); return 1; } #if 0 printf("job_path: \"%s\"\n", job_path); printf("njobs: %d\n", njobs); printf("nthreads: %d\n", nthreads); printf("native: %s\n", native_spec); printf("dowait: %s\n", dowait?"yes":"no"); printf("quiet: %s\n", quiet?"yes":"no"); printf("scenario: %s\n", scenario?scenario:"<no such>"); printf("site_b: %s\n", site_b?site_b:"<no such>"); printf("1st arg: %s\n", job_args?job_args[0]:"<noargs>"); #endif if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); return 1; } get_gmt(&start_s); if (!scenario) { if (!getuid()) { fprintf(stderr, "switching to ah114088:gridware\n"); setegid(339); seteuid(115088); } if (!(jt = create_job_template(job_path, NULL, 0))) { fprintf(stderr, "create_sleeper_job_template() failed\n"); return 1; } if (nthreads==1) { if (submit_jobs(&argv[i])) return 1; } else { pthread_t *ids = NULL; ids = (pthread_t *)malloc(sizeof (pthread_t) * nthreads); for (i = 0; i < nthreads; i++) { if (pthread_create(&ids[i], NULL, submit_jobs, NULL)) { fprintf(stderr, "pthread_create() failed: %s\n", strerror(errno)); free(ids); return 1; } } for (i = 0; i < nthreads; i++) { pthread_join(ids[i], NULL); } } drmaa_delete_job_template(jt, NULL, 0); if (!getuid()) { fprintf(stderr, "switching to root:root\n"); seteuid(0); setegid(0); } } else { if (submit_by_project("project1") || submit_by_project("project2") || submit_by_project("project3") || submit_by_project("project4")) return 1; } get_gmt(&finish_s); printf("submission took %8.3f seconds\n", DELTA_SECONDS(start_s, finish_s)); if (dowait) { int success = 1; for (i=0; i<njobs * nthreads; i++) { int stat; int aborted, exited, exit_status, signaled; drmaa_errno = drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid, sizeof(jobid)-1, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_wait() failed: %s\n", diagnosis); return 1; } /* * report how job finished */ drmaa_wifaborted(&aborted, stat, NULL, 0); if (aborted) { printf("job \"%s\" never ran\n", jobid); success = 0; } else { drmaa_wifexited(&exited, stat, NULL, 0); if (exited) { drmaa_wexitstatus(&exit_status, stat, NULL, 0); if (exit_status != 0) { success = 0; printf("job \"%s\" with exit status %d\n", jobid, exit_status); } else { if (!quiet) printf("job \"%s\" finished regularly\n", jobid); } } else { success = 0; drmaa_wifsignaled(&signaled, stat, NULL, 0); if (signaled) { char termsig[DRMAA_SIGNAL_BUFFER+1]; drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0); printf("job \"%s\" finished due to signal %s\n", jobid, termsig); } else printf("job \"%s\" finished with unclear conditions\n", jobid); } } } if (!success) ret = 1; get_gmt(&wait_s); printf("wait took %8.3f seconds\n", DELTA_SECONDS(finish_s, wait_s)); printf("jobs took %8.3f seconds\n", DELTA_SECONDS(start_s, wait_s)); } if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); return 1; } return ret; }