static drmaa_job_template_t *create_job_template(const char *job_path, const char *job_cat) { drmaa_job_template_t *jt = NULL; if (drmaa_allocate_job_template(&jt, NULL, 0) != DRMAA_ERRNO_SUCCESS) return NULL; /* the job to be run */ drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, job_path, NULL, 0); /* join output/error file */ drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); /* path for output */ drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/dev/null", NULL, 0); /* job category */ drmaa_set_attribute(jt, DRMAA_JOB_CATEGORY, job_cat, NULL, 0); /* job native specification */ drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, "-ac test=\"one two three\"", NULL, 0); return jt; }
static drmaa_job_template_t *create_job_template(const char *job_path, int seconds, int as_bulk_job) { const char *job_argv[2]; drmaa_job_template_t *jt = NULL; char buffer[100]; if (drmaa_allocate_job_template(&jt, NULL, 0)!=DRMAA_ERRNO_SUCCESS) return NULL; /* run in users home directory */ drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0); /* the job to be run */ drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, job_path, NULL, 0); /* the job's arguments */ sprintf(buffer, "%d", seconds); job_argv[0] = buffer; job_argv[1] = NULL; drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0); /* join output/error file */ drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); /* path for output */ if (!as_bulk_job) drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB", NULL, 0); else drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":"DRMAA_PLACEHOLDER_HD"/DRMAA_JOB."DRMAA_PLACEHOLDER_INCR, NULL, 0); return jt; }
static drmaa_job_template_t *create_job_template(const char *job_path, const char *job_name, const char *pred_name, int seconds, int hold) { drmaa_job_template_t *jt = NULL; int drmaa_errno; char buf[512]; const char *job_argv[2]; if (drmaa_allocate_job_template(&jt, NULL, 0) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "error: failed to create job template %s.\n", job_name); exit(1); } /* the job to be run */ drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, job_path, NULL, 0); /* join output/error file */ drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); /* path for output */ drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/dev/null", NULL, 0); /* job name for hold_jid_ad list */ drmaa_set_attribute(jt, DRMAA_JOB_NAME, job_name, NULL, 0); /* drmaa_run_bulk_job(3) must accept -hold_jid_ad wc_job_list when passed through job template attribute drmaa_native_specification */ if (hold) { strcpy(buf, "-h "); } if (pred_name != NULL) { if (strlen(pred_name) > 256) { fprintf(stderr, "error: predecessor name too long!\n"); exit(1); } strcpy(buf, "-hold_jid_ad "); strcat(buf, pred_name); } if (hold || pred_name != NULL) { drmaa_errno = drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, buf, errorbuf, sizeof(errorbuf)-1); if(drmaa_errno != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_set_attribute failed: %s\n", errorbuf); exit(1); } } /* control job sleep time */ sprintf(buf, "%d", seconds); job_argv[0] = buf; job_argv[1] = NULL; drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, job_argv, NULL, 0); return jt; }
void setup_job_template( drmaa_job_template_t **jt) { char error[DRMAA_ERROR_STRING_BUFFER]; int rc; char cwd[DRMAA_ATTR_BUFFER]; const char *args[3] = {"-l", "-a", NULL}; rc = drmaa_allocate_job_template(jt, error, DRMAA_ERROR_STRING_BUFFER); if ( rc != DRMAA_ERRNO_SUCCESS) { fprintf(stderr,"drmaa_allocate_job_template() failed: %s\n", error); exit(-1); } if ( getcwd(cwd, DRMAA_ATTR_BUFFER) == NULL ) { perror("Error getting current working directory"); exit(-1); } /* SHOULD CHECK RC's, REMOVED FOR THE SHAKE OF CLARTIY */ rc = drmaa_set_attribute(*jt, DRMAA_WD, cwd, error, DRMAA_ERROR_STRING_BUFFER); rc = drmaa_set_attribute(*jt, DRMAA_JOB_NAME, "ht3", error, DRMAA_ERROR_STRING_BUFFER); rc = drmaa_set_attribute(*jt, DRMAA_REMOTE_COMMAND, "/bin/ls", error, DRMAA_ERROR_STRING_BUFFER); rc = drmaa_set_vector_attribute(*jt, DRMAA_V_ARGV, args, error, DRMAA_ERROR_STRING_BUFFER); rc = drmaa_set_attribute(*jt, DRMAA_OUTPUT_PATH, "stdout."DRMAA_GW_JOB_ID, error, DRMAA_ERROR_STRING_BUFFER); rc = drmaa_set_attribute(*jt, DRMAA_ERROR_PATH, "stderr."DRMAA_GW_JOB_ID, error, DRMAA_ERROR_STRING_BUFFER); }
int main (int argc, char **argv) { char error[DRMAA_ERROR_STRING_BUFFER]; int errnum = 0; drmaa_job_template_t *jt = NULL; errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); return 1; } errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not create job template: %s\n", error); } else { errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { const char *args[2] = {"60", NULL}; errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, DRMAA_ERROR_STRING_BUFFER); } if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { char jobid[DRMAA_JOBNAME_BUFFER]; errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not submit job: %s\n", error); } else { int status = 0; printf ("Your job has been submitted with id %s\n", jobid); sleep (20); errnum = drmaa_job_ps (jobid, &status, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not get job' status: %s\n", error); } else { switch (status) { case DRMAA_PS_UNDETERMINED: printf ("Job status cannot be determined\n"); break; case DRMAA_PS_QUEUED_ACTIVE: printf ("Job is queued and active\n"); break; case DRMAA_PS_SYSTEM_ON_HOLD: printf ("Job is queued and in system hold\n"); break; case DRMAA_PS_USER_ON_HOLD: printf ("Job is queued and in user hold\n"); break; case DRMAA_PS_USER_SYSTEM_ON_HOLD: printf ("Job is queued and in user and system hold\n"); break; case DRMAA_PS_RUNNING: printf ("Job is running\n"); break; case DRMAA_PS_SYSTEM_SUSPENDED: printf ("Job is system suspended\n"); break; case DRMAA_PS_USER_SUSPENDED: printf ("Job is user suspended\n"); break; case DRMAA_PS_USER_SYSTEM_SUSPENDED: printf ("Job is user and system suspended\n"); break; case DRMAA_PS_DONE: printf ("Job finished normally\n"); break; case DRMAA_PS_FAILED: printf ("Job finished, but failed\n"); break; } /* switch */ } /* else */ } /* else */ } /* else */ errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not delete job template: %s\n", error); } } /* else */ errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); return 1; } return 0; }
void *run(void *arg) { int ret = DRMAA_ERRNO_SUCCESS; char error[DRMAA_ERROR_STRING_BUFFER + 1]; drmaa_job_template_t *jt = NULL; int run = ((int *)arg)[0]; int thread = ((int *)arg)[1]; char jobid[DRMAA_JOBNAME_BUFFER + 1]; int queued = 1; int running = 0; int status = -1; free(arg); ret = drmaa_allocate_job_template(&jt, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } ret = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, CMD, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } ret = drmaa_set_attribute(jt, DRMAA_WD, WD, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } ret = drmaa_set_attribute(jt, DRMAA_JOB_CATEGORY, CATEGORY, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } printf("%d %d SETUP complete %ld\n", run, thread, time(NULL)); ret = drmaa_run_job(jobid, DRMAA_JOBNAME_BUFFER, jt, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } printf("%d %d SUBMITTED jobid: %s %ld\n", run, thread, jobid, time(NULL)); ret = drmaa_delete_job_template(jt, error, DRMAA_ERROR_STRING_BUFFER); handle_code(ret, error, run, thread); while (queued) { ret = drmaa_wait(jobid, NULL, 0, NULL, 2, NULL, error, DRMAA_ERROR_STRING_BUFFER); if (ret != DRMAA_ERRNO_EXIT_TIMEOUT) { if (handle_code(ret, error, run, thread) == 1) { return NULL; } } else { printf ("%d %d TIMEOUT jobid: %s %ld\n", run, thread, jobid, time (NULL)); } ret = drmaa_job_ps(jobid, &status, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } queued = (status == DRMAA_PS_QUEUED_ACTIVE) || (status == DRMAA_PS_SYSTEM_ON_HOLD) || (status == DRMAA_PS_SYSTEM_ON_HOLD) || (status == DRMAA_PS_USER_ON_HOLD) || (status == DRMAA_PS_USER_SYSTEM_ON_HOLD); } printf("%d %d RUNNING jobid: %s %ld\n", run, thread, jobid, time(NULL)); running = 1; while (running == 1) { ret = drmaa_wait(jobid, NULL, 0, NULL, 60, NULL, error, DRMAA_ERROR_STRING_BUFFER); if (ret != DRMAA_ERRNO_EXIT_TIMEOUT) { if (handle_code(ret, error, run, thread) == 1) { return NULL; } running = 0; printf("%d %d FINISHED jobid: %s %ld\n", run, thread, jobid, time(NULL)); } else { printf ("%d %d TIMEOUT jobid: %s %ld\n", run, thread, jobid, time (NULL)); ret = drmaa_job_ps(jobid, &status, error, DRMAA_ERROR_STRING_BUFFER); if (handle_code(ret, error, run, thread) == 1) { return NULL; } if (status != DRMAA_PS_RUNNING) { running = 0; printf("%d %d HUNG jobid: %s %ld\n", run, thread, jobid, time(NULL)); } } } return NULL; }
static drmaa_job_template_t *create_job_template(const char *job_path, const char *project, int i) { drmaa_job_template_t *jt = NULL; char buffer[10240]; if (drmaa_allocate_job_template(&jt, NULL, 0)!=DRMAA_ERRNO_SUCCESS) return NULL; /* run in users home directory */ drmaa_set_attribute(jt, DRMAA_WD, DRMAA_PLACEHOLDER_HD, NULL, 0); if (scenario) { if (!strcmp(scenario, "queue")) { if (!strcmp(site_b, "none")) sprintf(buffer, "-P %s -l APP%d=1 -q all.q", project, i); else if (!strcmp(site_b, "hostgroup")) sprintf(buffer, "-P %s -l APP%d=1 -q all.q@@site_b", project, i); else if (!strcmp(site_b, "resource")) sprintf(buffer, "-P %s -l APP%d=1,site=b -q all.q", project, i); else if (!strcmp(site_b, "softresource")) sprintf(buffer, "-P %s -l APP%d=1 -q all.q -soft -l site=b", project, i); else /* "softhostgroup" */ sprintf(buffer, "-P %s -l APP%d=1 -q all.q -soft -q *@@site_b", project, i); } else if (!strcmp(scenario, "type")) { if (!strcmp(site_b, "none")) sprintf(buffer, "-P %s -l APP%d=1,type=all", project, i); else if (!strcmp(site_b, "hostgroup")) sprintf(buffer, "-P %s -l APP%d=1,type=all -q *@@site_b", project, i); else if (!strcmp(site_b, "resource")) sprintf(buffer, "-P %s -l APP%d=1,type=all,site=b", project, i); else if (!strcmp(site_b, "softresource")) sprintf(buffer, "-P %s -l APP%d=1,type=all -soft -l site=b", project, i); else /* "softhostgroup" */ sprintf(buffer, "-P %s -l APP%d=1,type=all -soft -q *@@site_b", project, i); } else if (!strcmp(scenario, "number")) { if (!strcmp(site_b, "none")) sprintf(buffer, "-P %s -l APP%d=1,number=24", project, i); else if (!strcmp(site_b, "hostgroup")) sprintf(buffer, "-P %s -l APP%d=1,number=24 -q *@@site_b", project, i); else if (!strcmp(site_b, "resource")) sprintf(buffer, "-P %s -l APP%d=1,number=24,site=b", project, i); else if (!strcmp(site_b, "softresource")) sprintf(buffer, "-P %s -l APP%d=1,number=24 -soft -l site=b", project, i); else /* "softhostgroup" */ sprintf(buffer, "-P %s -l APP%d=1,number=24 -soft -q *@@site_b", project, i); } else if (!strcmp(scenario, "pe")) { if (!strcmp(site_b, "none")) sprintf(buffer, "-P %s -l APP%d=1 -pe pe1 1", project, i); else if (!strcmp(site_b, "hostgroup")) sprintf(buffer, "-P %s -l APP%d=1 -q *@@site_b -pe pe1 1", project, i); else if (!strcmp(site_b, "resource")) sprintf(buffer, "-P %s -l APP%d=1,site=b -pe pe1 1", project, i); else if (!strcmp(site_b, "softresource")) sprintf(buffer, "-P %s -l APP%d=1 -pe pe1 1 -soft -l site=b", project, i); else /* "softhostgroup" */ sprintf(buffer, "-P %s -l APP%d=1 -pe pe1 1 -soft -q *@@site_b", project, i); } else { /* "none" */ sprintf(buffer, "-l APP%d=1", i); } if (native_spec) { strcat(buffer, " "); strcat(buffer, native_spec); } } else strcpy(buffer, native_spec); /* printf("### native spec \"%s\"\n", buffer); */ drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, buffer, NULL, 0); /* the job to be run */ drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, job_path, NULL, 0); /* the job's arguments if any */ if (job_args) drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, (const char **)job_args, NULL, 0); /* join output/error file */ drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); /* submit job using euid/egid */ if (!getuid()) { drmaa_set_attribute(jt, DRMAA_SUBMIT_AS_EUID, "y", NULL, 0); } /* path for output */ drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, ":/dev/null", NULL, 0); return jt; }
int launch (const char* script, const char* arg_vec[], int num_tasks) { char error[DRMAA_ERROR_STRING_BUFFER]; int errnum = 0; drmaa_job_template_t *jt = NULL; errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); return 1; } errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not create job template: %s\n", error); } else { errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, script, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, arg_vec, error, DRMAA_ERROR_STRING_BUFFER); } if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); }else { drmaa_job_ids_t *ids = NULL; errnum = drmaa_run_bulk_jobs (&ids, jt, 1, num_tasks, 1, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not submit job: %s\n", error); }else { char jobid[DRMAA_JOBNAME_BUFFER]; const char *jobids[2] = {DRMAA_JOB_IDS_SESSION_ALL, NULL}; while (drmaa_get_next_job_id (ids, jobid, DRMAA_JOBNAME_BUFFER) == DRMAA_ERRNO_SUCCESS) { printf ("A job task has been submitted with id %s\n", jobid); } errnum = drmaa_synchronize (jobids, DRMAA_TIMEOUT_WAIT_FOREVER, 1, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not wait for jobs: %s\n", error); }else { printf ("All job tasks have finished.\n"); } } drmaa_release_job_ids (ids); } /* else */ errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not delete job template: %s\n", error); } } /* else */ errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); return 1; } return 0; }
int main (int argc, char **argv) { char error[DRMAA_ERROR_STRING_BUFFER]; int errnum = 0; drmaa_job_template_t *jt = NULL; errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); return 1; } errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not create job template: %s\n", error); } else { errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { const char *args[2] = {"60", NULL}; errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, DRMAA_ERROR_STRING_BUFFER); } if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not set attribute \"%s\": %s\n", DRMAA_REMOTE_COMMAND, error); } else { char jobid[DRMAA_JOBNAME_BUFFER]; errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not submit job: %s\n", error); } else { printf ("Your job has been submitted with id %s\n", jobid); errnum = drmaa_control (jobid, DRMAA_CONTROL_TERMINATE, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not delete job: %s\n", error); } else { printf ("Your job has been deleted\n"); } } } /* else */ errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not delete job template: %s\n", error); } } /* else */ errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); return 1; } return 0; }
int main () { char error[DRMAA_ERROR_STRING_BUFFER]; int errnum = 0; char jobid[DRMAA_JOBNAME_BUFFER]; char jobid_2[DRMAA_JOBNAME_BUFFER]; /* Init Session */ /* drmaa_init | oar_connect */ errnum = drmaa_init ("localhost", error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't init DRMAA library: %s\n", error); return 1; } /* Do Stuff */ /*instance handling*/ //instance_handling(); /* Allocate Job Template */ drmaa_job_template_t *jt = NULL; errnum = drmaa_allocate_job_template (&jt, error,DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't allocate job template: %s\n", error); return 1; } char cmd1[]="/bin/sleep"; char cmd2[]="exit"; /* Job Templates */ errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, cmd2, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't set remote command: %s\n", error); return 1; } /* set HOLD state at submission */ errnum = drmaa_set_attribute(jt, DRMAA_JS_STATE, DRMAA_SUBMISSION_STATE_HOLD, NULL, 0); const char *args[2] = {"5", NULL}; errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't set remote command args: %s\n", error); return 1; } /* Run Job */ errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't run job: %s\n", error); return 1; } else { printf ("Your job has been submitted with id %s\n", jobid); } /* Run Job */ /* errnum = drmaa_run_job (jobid_2, DRMAA_JOBNAME_BUFFER, jt, error, DRMAA_ERROR_STRING_BUFFER); */ /* Get Job State */ /** * The possible values of * a program's staus are: * - DRMAA_PS_UNDETERMINED * - DRMAA_PS_QUEUED_ACTIVE * - DRMAA_PS_SYSTEM_ON_HOLD * - DRMAA_PS_USER_ON_HOLD * - DRMAA_PS_USER_SYSTEM_ON_HOLD * - DRMAA_PS_RUNNING * - DRMAA_PS_SYSTEM_SUSPENDED * - DRMAA_PS_USER_SUSPENDED * - DRMAA_PS_DONE * - DRMAA_PS_FAILED * Terminated jobs have a status of DRMAA_PS_FAILED. */ /* int remote_ps; errnum = drmaa_job_ps(jobid, &remote_ps, error, DRMAA_ERROR_STRING_BUFFER); printf("drmaa_job_ps: job_id: %s job_ps: %d\n",jobid,remote_ps); errnum = drmaa_job_ps(jobid, &remote_ps, error, DRMAA_ERROR_STRING_BUFFER); printf("2-drmaa_job_ps: job_id: %s job_ps: %d\n",jobid,remote_ps); */ /* Conrol Job */ /* * - DRMAA_CONTROL_SUSPEND 0 * - DRMAA_CONTROL_RESUME 1 * - DRMAA_CONTROL_HOLD 2 * - DRMAA_CONTROL_RELEASE 3 * - DRMAA_CONTROL_TERMINATE 4 */ /* Delete Job */ /* int i; for(i=0;i<0;i++) { errnum = drmaa_control(jobid, i, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't drmaa_control job: %s\n", error); } else { const char *str = drmaa_control_to_str(i); printf("drmaa_control: job_id: %s action: %s\n", jobid, str); } } */ sleep(7); printf("drmaa_wait \n"); int stat; drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid, sizeof(jobid)-1, &stat, 20, NULL, NULL, 0); printf("drmaa_wait JobId: %s\n", jobid); /* const char **job_ids = NULL; job_ids = calloc( 2, sizeof(char *) ); job_ids[0]=jobid; printf("drmaa_synchronize\n"); errnum = drmaa_synchronize(job_ids, 200, 0, error, DRMAA_ERROR_STRING_BUFFER); */ /* TODO free fsb_free_vector(job_ids); */ if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't drmaa_synchronize: %s\n", error); } /* Delete Job Template*/ errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't delete job template: %s\n", error); return 1; } /* Exit Session */ /* drmaa_exit | oar_disconnect */ errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); if (errnum != DRMAA_ERRNO_SUCCESS) { fprintf (stderr, "Couldn't exit DRMAA library: %s\n", error); return 1; } return 0; /* return drmaa_init("hello"); return drmaa_exit() ; return 0; */ }
int main(int argc, char *argv[]) { const char *filename; int ret = 0; char diagnosis[DRMAA_ERROR_STRING_BUFFER]; char jobwd[1024*4]; char jobid[1024]; char line[2*1024]; int drmaa_errno; drmaa_job_template_t *jt = NULL; int stat; int aborted, exited, exit_status, signaled; int j, njobs = 0; FILE *fp; struct sigaction sa; /* clear job info structure */ clear_all_job_info(); /* setup a signal handler for shutdown */ memset(&sa, 0, sizeof(sa)); sa.sa_handler = my_compile_signal_handler; /* one handler for all signals */ sigemptyset(&sa.sa_mask); sigaction(SIGINT, &sa, NULL); sigaction(SIGTERM, &sa, NULL); sigaction(SIGHUP, &sa, NULL); sigaction(SIGPIPE, &sa, NULL); /* we can override use of a compile.conf in cwd by environment */ filename = getenv("RAIMK_COMPILE_CONF"); if (filename == NULL) { filename = "compile.conf"; } /* we'll start the job in the cwd */ if (!getcwd(jobwd, sizeof(jobwd)-1)) { fprintf(stderr, "getcwd() failed: %s\n", strerror(errno)); ret = 2; goto Finish; } /* try to open config file */ if (!(fp = fopen(filename, "r"))) { fprintf(stderr, "fopen(\"compile.conf\") failed: %s\n", strerror(errno)); ret = 2; goto Finish; } /* initialize a drmaa session */ if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); fclose(fp); return 2; } printf("--- start cluster session --------------------------------\n"); /* parse the config file and start a job for every architecture */ while (fscanf(fp, "%[^\n]\n", line) == 1) { char nat_spec[1024]; char arch[1024]; char name[1024]; char ns[1024]; char output_file[1024]; char dummy[1024]; /* skip comment lines */ if (line[0] == '#') continue; if (sscanf(line, "%[^\t ]%[\t ]%[^\n]\n", arch, dummy, ns) != 3) { fprintf(stderr, "parsing error in compile.conf\n"); continue; } sprintf(name, "build %s", arch); /* build job template */ if (drmaa_allocate_job_template(&jt, NULL, 0)!=DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); ret = 2; goto Finish; } drmaa_set_attribute(jt, DRMAA_WD, jobwd, NULL, 0); drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, argv[1], NULL, 0); drmaa_set_attribute(jt, DRMAA_JOIN_FILES, "y", NULL, 0); drmaa_set_attribute(jt, DRMAA_JOB_NAME, name, NULL, 0); sprintf(nat_spec, "-b no -S /bin/sh %s", ns); drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, nat_spec, NULL, 0); sprintf(output_file, ":%s/build_%s.log", jobwd, arch); drmaa_set_attribute(jt, DRMAA_OUTPUT_PATH, output_file, NULL, 0); drmaa_set_vector_attribute(jt, DRMAA_V_ARGV, (const char **)&argv[2], NULL, 0); /* submit job */ if ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)) != DRMAA_ERRNO_SUCCESS) { drmaa_delete_job_template(jt, NULL, 0); if (drmaa_errno == DRMAA_ERRNO_DENIED_BY_DRM) { printf("--- job \"%s\" using \"%s\" wasn't accepted: %s\n", name, ns, diagnosis); continue; } fprintf(stderr, "drmaa_run_job() failed: %s\n", diagnosis); ret = 2; goto Finish; } /* remember job information */ job[njobs].jid = strdup(jobid); job[njobs].name = strdup(name); job[njobs].ns = strdup(ns); job[njobs].output_file = strdup(output_file); njobs++; drmaa_delete_job_template(jt, NULL, 0); printf(" submitted job \"%s\" as job %s\n", name, jobid); } fclose(fp); /* monitor jobs, until all have finished */ while (number_of_jobs() > 0) { /* We wait with timeout to be able to react on events like CTRL-C */ drmaa_errno = drmaa_wait(DRMAA_JOB_IDS_SESSION_ANY, jobid, sizeof(jobid)-1, &stat, 1, NULL, diagnosis, sizeof(diagnosis)-1); /* error */ if (drmaa_errno != DRMAA_ERRNO_SUCCESS && drmaa_errno != DRMAA_ERRNO_EXIT_TIMEOUT) { fprintf(stderr, "drmaa_wait() failed: %s\n", diagnosis); ret = 2; goto Finish; } /* user pressed CTRL-C: delete all jobs */ if (terminate_session) { printf("--- shutdown requested --------------------------------\n"); delete_all_jobs(); } /* if user pressed CTRL-C multiple times, exit */ if (terminate_program) { printf("--- forced shutdown -----------------------------------\n"); goto Finish; } /* * a job terminated - evaluate return codes and deregister job from * our internal bookkeeping */ if (drmaa_errno == DRMAA_ERRNO_SUCCESS) { j = search_job(jobid); if (j < 0) { fprintf(stderr, "drmaa_wait() returns unknown job ... ?\n"); } /* report how job finished */ drmaa_wifaborted(&aborted, stat, NULL, 0); if (aborted) { printf("--- run \"%s\" stopped or never started\n", job[j].name); } else { int failed = 1; char *path = job[j].output_file + 1; drmaa_wifexited(&exited, stat, NULL, 0); if (exited) { drmaa_wexitstatus(&exit_status, stat, NULL, 0); if (exit_status == 0) { printf("+++ run \"%s\" was successful\n", job[j].name); failed = 0; } else { printf("### run \"%s\" broken ##################################\n", job[j].name); ret = 1; } } else { drmaa_wifsignaled(&signaled, stat, NULL, 0); if (signaled) { char termsig[DRMAA_SIGNAL_BUFFER+1]; drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, stat, NULL, 0); printf("job \"%s\" finished due to signal %s\n", job[j].name, termsig); } else { printf("job \"%s\" finished with unclear conditions\n", job[j].name); } } /* * If a job succeeded, we delete its output file. * If it failed, we show the end of the output file. */ if (failed) { char tail_cmd[1024]; sprintf(tail_cmd, "tail -15 %s", path); ret = system(tail_cmd); } else { if (unlink(path) != 0) { fprintf(stderr, "couldn't unlink \"%s\" job output file %s: %s\n", job[j].name, path, strerror(errno)); } } } /* clean the job struct */ clear_job_info(j); } } printf("--- end cluster session --------------------------------\n"); Finish: if (drmaa_exit(diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { fprintf(stderr, "drmaa_exit() failed: %s\n", diagnosis); return 1; } return ret; }