char * prolog_fail(int which_fail) { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; switch(which_fail) { case 0: sprintf(jt,"prolog_stdin_nr.jt"); break; case 1: sprintf(jt,"prolog_stdin_r.jt"); break; case 2: sprintf(jt,"prolog_input_nr.jt"); break; case 3: sprintf(jt,"prolog_ex_nr.jt"); break; } if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } free(jt); rc = gw_client_wait(job_id, &exit_code, timeout); if(rc==GW_RC_FAILED_JOB_FAIL) { sprintf(reason,"OK"); } else { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); } return reason; }
int main(int argc, char **argv) { int job_id[MAX_JOBS_WAIT]; int i; int number_of_jobs; int array_id = -1; int error = 0; char opt; int exit_code; int * exit_codes; int a = 0, A = 0, v = 0, k = 0; signed long timeout = -1; gw_client_t * gw_session; gw_boolean_t any = GW_FALSE; struct sigaction act; gw_return_code_t rc; /* ---------------------------------------------------------------- */ /* Parse arguments */ /* ---------------------------------------------------------------- */ opterr = 0; optind = 1; while((opt = getopt(argc,argv,":hkavt:A:"))!= -1) switch(opt) { case 'h': printf("%s", usage); exit(0); break; case 'v': v = 1; break; case 'a': a = 1; any = GW_TRUE; break; case 'A': A = 1; array_id = atoi(optarg); break; case 't': timeout = atoi(optarg); break; case 'k': k = 1; break; case '?': fprintf(stderr,"error: invalid option \'%c\'\n",optopt); printf("%s", susage); exit(1); break; case ':': fprintf(stderr,"error: must provide an argument for option \'%c\'\n", optopt); printf("%s", susage); exit(1); break; } /* ---------------------------------------------------------------- */ /* Connect to GWD */ /* ---------------------------------------------------------------- */ gw_session = gw_client_init(); if ( gw_session == NULL ) { fprintf(stderr,"Could not connect to gwd\n"); return (-1); } act.sa_handler = signal_handler; act.sa_flags = SA_RESTART; sigemptyset(&act.sa_mask); sigaction(SIGTERM||SIGINT,&act,NULL); /* ---------------------------------------------------------------- */ /* Set job id array */ /* ---------------------------------------------------------------- */ if (!A) { if (optind < argc) { number_of_jobs = 0; while ( optind < argc ) { job_id[number_of_jobs++] = atoi(argv[optind++]); if (number_of_jobs >= (MAX_JOBS_WAIT - 1) ) { fprintf(stderr,"FAILED: Max number of jobs reached\n"); error = 1; } } job_id[number_of_jobs] = -1; } else { printf("%s",susage); error = 1; } } else { rc = gw_client_job_status_all( ); if (rc == GW_RC_SUCCESS) { number_of_jobs = 0; for (i=0; i<gw_client.number_of_jobs; i++) if (gw_client.job_pool[i] != NULL) if (gw_client.job_pool[i]->array_id == array_id) { job_id[number_of_jobs++] = gw_client.job_pool[i]->id; if (number_of_jobs>=(MAX_JOBS_WAIT - 1)) { fprintf(stderr,"FAILED: Max number of jobs reached\n"); error = 1; } } job_id[number_of_jobs] = -1; if (number_of_jobs == 0) { fprintf(stderr,"FAILED: failed bad array id\n"); error = 1; } } else { fprintf(stderr,"FAILED: %s\n",gw_ret_code_string(rc)); error = 1; } } if (error) { gw_client_finalize(); return -1; } /* ---------------------------------------------------------------- */ /* Wait for the jobs */ /* ---------------------------------------------------------------- */ if ( number_of_jobs == 1 ) { rc = gw_client_wait(job_id[0], &exit_code, timeout); if ( rc == GW_RC_SUCCESS ) { if (v) { printf("%i\n",exit_code); } if (!k) { gw_client_job_signal(job_id[0],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } } else { rc = gw_client_wait_set(job_id,&exit_codes,any,timeout); if (rc == GW_RC_SUCCESS) { if (any) { if (v) { printf("%i: %i\n",job_id[0],exit_codes[0]); } if (!k) { gw_client_job_signal(job_id[0],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } else { for (i=0; i<number_of_jobs; i++) { if (v) { printf("%-4i: %i\n",job_id[i],exit_codes[i]); } if (!k) { gw_client_job_signal(job_id[i],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } } } free(exit_codes); } gw_client_finalize(); if ( rc != GW_RC_SUCCESS) { fprintf(stderr,"FAILED: %s\n",gw_ret_code_string(rc)); return -1; } else return 0; }
/** prewrapper != 0 then use prewrapper.jt **/ char * normal_execution(int prewrapper) { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; if(prewrapper) sprintf(jt,"pre_wrapper.jt"); else sprintf(jt,"normal_execution.jt"); if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } free(jt); // Wait for the job if((rc = gw_client_wait(job_id, &exit_code, timeout)!=GW_RC_SUCCESS)) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * input_output() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; int exit_code; gw_msg_job_t job_status; struct stat buf; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"input_output_files.jt"); if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } free(jt); while((rc = gw_client_wait(job_id, &exit_code, -1)!=GW_RC_SUCCESS)) { printf(" Still waiting for job %d to finish.\n",job_id); fflush(NULL); sleep(2); } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } // Check files were created correctly // Absolut path output if( stat("/tmp/test.txt.out",&buf) != 0 ) { sprintf(reason,"Absolut path output file not found"); return reason; } unlink("/tmp/test.txt.out"); // Relative path output if( stat("passwd.out",&buf) != 0 ) { sprintf(reason,"Relative path output file not found"); return reason; } unlink("passwd.out"); // gsiftp output if( stat("/tmp/passwd.gsi",&buf) != 0 ) { sprintf(reason,"Gsiftp output file not found"); return reason; } unlink("/tmp/passwd.gsi"); sprintf(reason,"OK"); return reason; }
char * wait_timeout(int _timeout) { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout=0; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } free(jt); // by default, zerotimeout if(_timeout==0) timeout = 5; while((rc = gw_client_wait(job_id, &exit_code, timeout)!=GW_RC_SUCCESS)) { printf(" Still waiting for job %d to finish.\n",job_id); fflush(NULL); sleep(2); } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * checkpoint(int which_jt) { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; gw_msg_job_t job_status; struct stat buf; int exit_code; signed long timeout = -1; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; switch(which_jt) { case 0: sprintf(jt,"checkpoint.jt"); break; case 1: sprintf(jt,"checkpoint_gsiftp.jt"); break; } // Submit job if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Wait for the job if((rc = gw_client_wait(job_id, &exit_code, timeout)!=GW_RC_SUCCESS)) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } if( stat("outfile",&buf) != 0 ) { sprintf(reason,"Checkpointing file not found"); return reason; } unlink("outfile"); sprintf(reason,"OK"); return reason; }
char * migrate() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; gw_msg_job_t job_status; int exit_code; signed long timeout = -1; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"migration.jt"); // Submit job for migration if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Wait for wrapper state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } sleep(1); }while(strcmp(gw_job_state_string(job_status.job_state),"wrap")!=0); // Send migrate signal rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RESCHEDULE, GW_FALSE); printf(" Migrating job %d\n",job_id); fflush(NULL); // Wait for the job if((rc = gw_client_wait(job_id, &exit_code, timeout)!=GW_RC_SUCCESS)) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * stop_resume() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job printf(" Submitting job\n"); fflush(NULL); if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Wait for wrapper state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } sleep(1); }while(strcmp(gw_job_state_string(job_status.job_state),"wrap")!=0); // Stop the job rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_STOP, GW_TRUE); printf(" Job stopped\n"); fflush(NULL); // Wait for stop state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } }while(strcmp(gw_job_state_string(job_status.job_state),"stop")!=0); // Resume the job rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RESUME, GW_TRUE); printf(" Job resumed\n"); fflush(NULL); // Wait for the job to finish if((rc = gw_client_wait(job_id, &exit_code, timeout))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } // Check that it finished in the correct state if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * hold_release() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job in hold if((rc=gw_client_job_submit(jt,GW_JOB_STATE_HOLD,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Check that it's being submitted in hold if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"hold")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } // Release it rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RELEASE, GW_TRUE); // Wait for the job to finish if((rc = gw_client_wait(job_id, &exit_code, timeout))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } // Check that it finished in the correct state if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * execution_fail(int which_fail) { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; switch(which_fail) { case 0: sprintf(jt,"failed_execution_no_reschedule.jt"); break; case 1: sprintf(jt,"failed_execution_reschedule.jt"); break; } if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } free(jt); // Wait for the job if((rc = gw_client_wait(job_id, &exit_code, timeout))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=1) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }