int main(int argc, char *argv[]) { char diagnosis[DRMAA_ERROR_STRING_BUFFER]; const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK+1]; char jobid[100]; int drmaa_errno, i, pos = 0; const char *job_path; drmaa_job_template_t *jt; if (argc<2) { fprintf(stderr, "usage: example <path-to-job>\n"); return 1; } job_path = argv[1]; if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); return 1; } /* * submit some bulk jobs */ if (!(jt = create_job_template(job_path, 5, 1))) { fprintf(stderr, "create_job_template() failed\n"); return 1; } for (i=0; i<NBULKS; i++) { drmaa_job_ids_t *jobids; int j; while ((drmaa_errno=drmaa_run_bulk_jobs(&jobids, jt, 1, JOB_CHUNK, 1, diagnosis, sizeof(diagnosis)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { fprintf(stderr, "drmaa_run_bulk_jobs() failed - retry: %s %s\n", diagnosis, drmaa_strerror(drmaa_errno)); sleep(1); } if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs() failed: %s %s\n", diagnosis, drmaa_strerror(drmaa_errno)); return 1; } printf("submitted bulk job with jobids:\n"); for (j=0; j<JOB_CHUNK; j++) { drmaa_get_next_job_id(jobids, jobid, sizeof(jobid)-1); all_jobids[pos++] = strdup(jobid); printf("\t \"%s\"\n", jobid); } drmaa_release_job_ids(jobids); } drmaa_delete_job_template(jt, NULL, 0); /* * submit some sequential jobs */ if (!(jt = create_job_template(job_path, 5, 0))) { fprintf(stderr, "create_sleeper_job_template() failed\n"); return 1; } for (i=0; i<JOB_CHUNK; i++) { while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { fprintf(stderr, "drmaa_run_job() failed - retry: %s\n", diagnosis); sleep(1); } if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); return 1; } printf("\t \"%s\"\n", jobid); all_jobids[pos++] = strdup(jobid); } /* set string array end mark */ all_jobids[pos] = NULL; drmaa_delete_job_template(jt, NULL, 0); /* * synchronize with all jobs */ drmaa_errno = drmaa_synchronize(all_jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 0, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_synchronize(DRMAA_JOB_IDS_SESSION_ALL, dispose) failed: %s\n", diagnosis); return 1; } printf("synchronized with all jobs\n"); /* * wait all those jobs */ for (pos=0; pos<NBULKS*JOB_CHUNK + JOB_CHUNK; pos++) { int stat; int aborted, exited, exit_status, signaled; drmaa_errno = drmaa_wait(all_jobids[pos], jobid, sizeof(jobid)-1, &stat, DRMAA_TIMEOUT_WAIT_FOREVER, NULL, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_wait(%s) failed: %s\n", all_jobids[pos], diagnosis); return 1; } /* * report how job finished */ drmaa_wifaborted(&aborted, stat, NULL, 0); if (aborted) printf("job \"%s\" never ran\n", all_jobids[pos]); else { drmaa_wifexited(&exited, stat, NULL, 0); if (exited) { drmaa_wexitstatus(&exit_status, stat, NULL, 0); printf("job \"%s\" finished regularly with exit status %d\n", all_jobids[pos], exit_status); } else { drmaa_wifsignaled(&signaled, stat, NULL, 0); if (signaled) { char termsig[DRMAA_SIGNAL_BUFFER+1]; drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0); printf("job \"%s\" finished due to signal %s\n", all_jobids[pos], termsig); } else printf("job \"%s\" finished with unclear conditions\n", all_jobids[pos]); } } } if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); return 1; } return 0; }
int main(int argc, char **argv) { drmaa_job_template_t *jt_a = NULL; drmaa_job_template_t *jt_b = NULL; drmaa_job_ids_t *jobids_a = NULL; drmaa_job_ids_t *jobids_b = NULL; const char *base_name = "Job"; char name_a[512], name_b[512]; struct chunking_t { int a; int b; } chunking[] = { { 1, 1 }, { 1, 3 }, { 1, 4 }, { 1, 12 }, { 3, 1 }, { 3, 3 }, { 3, 4 }, { 3, 12 }, { 4, 1 }, { 4, 3 }, { 4, 4 }, { 4, 12 }, { 12, 1 }, { 12, 3 }, { 12, 4 }, { 12, 12 } }; int samples, i, drmaa_errno = 0; while (argc > 2) { if (!strcmp(argv[1], "-ps")) { fprintf(stderr, "use drmaa_job_ps() for progress monitoring\n"); do_ps = 1; argc--; argv++; } if (!strcmp(argv[1], "-1st")) { fprintf(stderr, "first sample only\n"); do_1st = 1; argc--; argv++; } if (!strcmp(argv[1], "-exit_unknown")) { fprintf(stderr, "first sample only\n"); do_exit = 0; argc--; argv++; } if (!strcmp(argv[1], "-nm")) { argc--; argv++; base_name = argv[1]; argc--; argv++; fprintf(stderr, "use \"%s\" as base name\n", base_name); } } if (argc != 2) { printf("Usage: %s path_to_sleeper_script\n", argv[0]); exit(1); } if (strstr(argv[1], "sleeper.sh") == NULL) { printf("Usage: %s path_to_sleeper_script\n", argv[0]); exit(1); } if (do_1st) samples = 1; else samples = sizeof(chunking)/sizeof(void *); while ((drmaa_errno=drmaa_init(NULL, errorbuf, sizeof(errorbuf)-1) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE)) sleep(1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init failed: %s\n", errorbuf); exit(EXIT_FAILURE); } strcpy(name_a, base_name); strcat(name_a, "A"); strcpy(name_b, base_name); strcat(name_b, "B"); for (i=0; i< samples; i++) { jt_a = create_job_template(argv[1], name_a, NULL, chunking[i].a, 1); jt_b = create_job_template(argv[1], name_b, name_a, chunking[i].b, 0); while ((drmaa_errno = drmaa_run_bulk_jobs(&jobids_a, jt_a, 1, BULK_SIZE, chunking[i].a, errorbuf, sizeof(errorbuf)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs(%d-%d:%d) failed: %s\n", 1, BULK_SIZE, chunking[i].a, errorbuf); return 1; } while ((drmaa_errno = drmaa_run_bulk_jobs(&jobids_b, jt_b, 1, BULK_SIZE, chunking[i].b, errorbuf, sizeof(errorbuf)-1))==DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_bulk_jobs(%d-%d:%d) failed: %s\n", 1, BULK_SIZE, chunking[i].b, errorbuf); return 1; } /* sleeper will sleep 60s by default, so this test should be okay */ validate_jobs(jobids_a, chunking[i].a, jobids_b, chunking[i].b); drmaa_release_job_ids(jobids_a); drmaa_release_job_ids(jobids_b); drmaa_errno = drmaa_delete_job_template(jt_a, errorbuf, sizeof(errorbuf)-1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_delete_job_template failed: %s\n", errorbuf); return 1; } drmaa_errno = drmaa_delete_job_template(jt_b, errorbuf, sizeof(errorbuf)-1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_delete_job_template failed: %s\n", errorbuf); return 1; } } while ((drmaa_errno = drmaa_exit(errorbuf, sizeof(errorbuf)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) sleep(1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit failed: %s\n", errorbuf); return 1; } printf ("OK\n"); return 0; }
int launch (const char* script, const char* arg_vec[], int num_tasks) { char error[DRMAA_ERROR_STRING_BUFFER]; int errnum = 0; drmaa_job_template_t *jt = NULL; errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); return 1; } errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not create job template: %s\n", error); } else { errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, script, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, arg_vec, error, DRMAA_ERROR_STRING_BUFFER); } if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); }else { drmaa_job_ids_t *ids = NULL; errnum = drmaa_run_bulk_jobs (&ids, jt, 1, num_tasks, 1, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not submit job: %s\n", error); }else { char jobid[DRMAA_JOBNAME_BUFFER]; const char *jobids[2] = {DRMAA_JOB_IDS_SESSION_ALL, NULL}; while (drmaa_get_next_job_id (ids, jobid, DRMAA_JOBNAME_BUFFER) == DRMAA_ERRNO_SUCCESS) { printf ("A job task has been submitted with id %s\n", jobid); } errnum = drmaa_synchronize (jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 1, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not wait for jobs: %s\n", error); }else { printf ("All job tasks have finished.\n"); } } drmaa_release_job_ids (ids); } /* else */ errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not delete job template: %s\n", error); } } /* else */ errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); return 1; } return 0; }