char * kill_hard() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job for hard kill if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Non Blocking kill rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_KILL_HARD, GW_FALSE); // Wait for job to disappear from GW do { rc = gw_client_job_status(job_id, &job_status); }while(rc!=GW_RC_FAILED_BAD_JOB_ID); sprintf(reason,"OK"); return reason; }
char * kill_sync() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job for synchronous kill if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Blocking kill rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_KILL, GW_TRUE); // Check that it finished in the correct state if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_FAILED_BAD_JOB_ID) { sprintf(reason,"Wrong job state %s, the job shouldn't exist",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
int main(int argc, char **argv) { int job_id[MAX_JOBS_WAIT]; int i; int number_of_jobs; int array_id = -1; int error = 0; char opt; int exit_code; int * exit_codes; int a = 0, A = 0, v = 0, k = 0; signed long timeout = -1; gw_client_t * gw_session; gw_boolean_t any = GW_FALSE; struct sigaction act; gw_return_code_t rc; /* ---------------------------------------------------------------- */ /* Parse arguments */ /* ---------------------------------------------------------------- */ opterr = 0; optind = 1; while((opt = getopt(argc,argv,":hkavt:A:"))!= -1) switch(opt) { case 'h': printf("%s", usage); exit(0); break; case 'v': v = 1; break; case 'a': a = 1; any = GW_TRUE; break; case 'A': A = 1; array_id = atoi(optarg); break; case 't': timeout = atoi(optarg); break; case 'k': k = 1; break; case '?': fprintf(stderr,"error: invalid option \'%c\'\n",optopt); printf("%s", susage); exit(1); break; case ':': fprintf(stderr,"error: must provide an argument for option \'%c\'\n", optopt); printf("%s", susage); exit(1); break; } /* ---------------------------------------------------------------- */ /* Connect to GWD */ /* ---------------------------------------------------------------- */ gw_session = gw_client_init(); if ( gw_session == NULL ) { fprintf(stderr,"Could not connect to gwd\n"); return (-1); } act.sa_handler = signal_handler; act.sa_flags = SA_RESTART; sigemptyset(&act.sa_mask); sigaction(SIGTERM||SIGINT,&act,NULL); /* ---------------------------------------------------------------- */ /* Set job id array */ /* ---------------------------------------------------------------- */ if (!A) { if (optind < argc) { number_of_jobs = 0; while ( optind < argc ) { job_id[number_of_jobs++] = atoi(argv[optind++]); if (number_of_jobs >= (MAX_JOBS_WAIT - 1) ) { fprintf(stderr,"FAILED: Max number of jobs reached\n"); error = 1; } } job_id[number_of_jobs] = -1; } else { printf("%s",susage); error = 1; } } else { rc = gw_client_job_status_all( ); if (rc == GW_RC_SUCCESS) { number_of_jobs = 0; for (i=0; i<gw_client.number_of_jobs; i++) if (gw_client.job_pool[i] != NULL) if (gw_client.job_pool[i]->array_id == array_id) { job_id[number_of_jobs++] = gw_client.job_pool[i]->id; if (number_of_jobs>=(MAX_JOBS_WAIT - 1)) { fprintf(stderr,"FAILED: Max number of jobs reached\n"); error = 1; } } job_id[number_of_jobs] = -1; if (number_of_jobs == 0) { fprintf(stderr,"FAILED: failed bad array id\n"); error = 1; } } else { fprintf(stderr,"FAILED: %s\n",gw_ret_code_string(rc)); error = 1; } } if (error) { gw_client_finalize(); return -1; } /* ---------------------------------------------------------------- */ /* Wait for the jobs */ /* ---------------------------------------------------------------- */ if ( number_of_jobs == 1 ) { rc = gw_client_wait(job_id[0], &exit_code, timeout); if ( rc == GW_RC_SUCCESS ) { if (v) { printf("%i\n",exit_code); } if (!k) { gw_client_job_signal(job_id[0],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } } else { rc = gw_client_wait_set(job_id,&exit_codes,any,timeout); if (rc == GW_RC_SUCCESS) { if (any) { if (v) { printf("%i: %i\n",job_id[0],exit_codes[0]); } if (!k) { gw_client_job_signal(job_id[0],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } else { for (i=0; i<number_of_jobs; i++) { if (v) { printf("%-4i: %i\n",job_id[i],exit_codes[i]); } if (!k) { gw_client_job_signal(job_id[i],GW_CLIENT_SIGNAL_KILL,GW_FALSE); } } } } free(exit_codes); } gw_client_finalize(); if ( rc != GW_RC_SUCCESS) { fprintf(stderr,"FAILED: %s\n",gw_ret_code_string(rc)); return -1; } else return 0; }
int main(int argc, char **argv) { int job_id = -1; int array_id = -1; char opt; int a = 0, k = 0, t = 0, o = 0, s = 0, r = 0, l = 0, A = 0, hard=0; gw_client_t * gw_session; gw_boolean_t blocking = GW_TRUE; gw_client_signal_t signal = GW_CLIENT_SIGNAL_KILL; struct sigaction act; gw_return_code_t rc; int mrc; /* ---------------------------------------------------------------- */ /* Parse arguments */ /* ---------------------------------------------------------------- */ opterr = 0; optind = 1; while((opt = getopt(argc, argv, ":haktosrl9A:")) != -1) switch(opt) { case 'a': a = 1; blocking = GW_FALSE; break; case 't': t = 1; signal = GW_CLIENT_SIGNAL_STOP; break; case 'o': o = 1; signal = GW_CLIENT_SIGNAL_HOLD; break; case 's': s = 1; signal = GW_CLIENT_SIGNAL_RESCHEDULE; break; case 'r': r = 1; signal = GW_CLIENT_SIGNAL_RESUME; break; case 'l': l = 1; signal = GW_CLIENT_SIGNAL_RELEASE; break; case 'k': k = 1; signal = GW_CLIENT_SIGNAL_KILL; break; case '9': hard = 1; signal = GW_CLIENT_SIGNAL_KILL_HARD; break; case 'A': A = 1; array_id = atoi(optarg); break; case 'h': printf("%s", usage); exit(0); break; case '?': fprintf(stderr,"error: invalid option \'%c\'\n",optopt); printf("%s", susage); exit(1); break; case ':': fprintf(stderr,"error: must provide an argument for option \'%c\'\n", optopt); printf("%s", susage); exit(1); break; } if ( t+o+s+r+l+k+hard > 1 ) { printf("%s", susage); exit(1); } if (!A) { if (!(optind < argc)) { printf("%s", susage); return -1; } } /* ---------------------------------------------------------------- */ /* Connect to GWD */ /* ---------------------------------------------------------------- */ gw_session = gw_client_init(); if ( gw_session == NULL ) { fprintf(stderr,"Could not connect to gwd\n"); return (-1); } act.sa_handler = signal_handler; act.sa_flags = SA_RESTART; sigemptyset(&act.sa_mask); sigaction(SIGTERM||SIGINT,&act,NULL); if (A) { rc = gw_client_array_signal (array_id, signal, blocking); if ( rc != GW_RC_SUCCESS) { fprintf(stderr,"FAILED: failed could not signal one or more jobs!\n"); mrc = -1; } else mrc = 0; } else { for (;optind<argc;optind++) { job_id = atoi(argv[optind]); rc = gw_client_job_signal (job_id, signal, blocking); if ( rc != GW_RC_SUCCESS) { fprintf(stderr,"FAILED: %s (job %i)\n", gw_ret_code_string(rc),job_id); mrc = -1; } } } gw_client_finalize(); return mrc; }
gw_return_code_t gw_client_array_signal (int array_id, gw_client_signal_t signal, gw_boolean_t blocking) { int i; int array_exists = 0; gw_return_code_t rc; gw_return_code_t frc; frc = GW_RC_SUCCESS; rc = gw_client_job_status_all( ); if (rc == GW_RC_SUCCESS) { pthread_mutex_lock(&(gw_client.mutex)); /* First make a FAST kill */ for (i=0;i<gw_client.number_of_jobs;i++) if (gw_client.job_pool[i] != NULL ) if (gw_client.job_pool[i]->array_id == array_id) { switch(gw_client.job_pool[i]->job_state) { case GW_JOB_STATE_INIT: case GW_JOB_STATE_PENDING: case GW_JOB_STATE_HOLD: case GW_JOB_STATE_STOPPED: case GW_JOB_STATE_ZOMBIE: case GW_JOB_STATE_FAILED: pthread_mutex_unlock(&(gw_client.mutex)); rc = gw_client_job_signal (i, signal, blocking); pthread_mutex_lock(&(gw_client.mutex)); if ( rc != GW_RC_SUCCESS) frc = GW_RC_FAILED; array_exists = 1; break; default: break; } } /* kill the rest of the array */ for (i=0;i<gw_client.number_of_jobs;i++) if (gw_client.job_pool[i] != NULL ) if (gw_client.job_pool[i]->array_id == array_id) { switch(gw_client.job_pool[i]->job_state) { case GW_JOB_STATE_PROLOG: case GW_JOB_STATE_PRE_WRAPPER: case GW_JOB_STATE_WRAPPER: case GW_JOB_STATE_EPILOG: case GW_JOB_STATE_EPILOG_STD: case GW_JOB_STATE_EPILOG_RESTART: case GW_JOB_STATE_EPILOG_FAIL: case GW_JOB_STATE_STOP_CANCEL: case GW_JOB_STATE_STOP_EPILOG: case GW_JOB_STATE_KILL_CANCEL: case GW_JOB_STATE_KILL_EPILOG: case GW_JOB_STATE_MIGR_CANCEL: case GW_JOB_STATE_MIGR_PROLOG: case GW_JOB_STATE_MIGR_EPILOG: pthread_mutex_unlock(&(gw_client.mutex)); rc = gw_client_job_signal (i, signal, blocking); pthread_mutex_lock(&(gw_client.mutex)); if ( rc != GW_RC_SUCCESS) frc = GW_RC_FAILED; array_exists = 1; break; default: break; } } pthread_mutex_unlock(&(gw_client.mutex)); } if (array_exists == 0) frc = GW_RC_FAILED_BAD_ARRAY_ID; return frc; }
char * migrate() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; gw_msg_job_t job_status; int exit_code; signed long timeout = -1; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"migration.jt"); // Submit job for migration if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Wait for wrapper state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } sleep(1); }while(strcmp(gw_job_state_string(job_status.job_state),"wrap")!=0); // Send migrate signal rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RESCHEDULE, GW_FALSE); printf(" Migrating job %d\n",job_id); fflush(NULL); // Wait for the job if((rc = gw_client_wait(job_id, &exit_code, timeout)!=GW_RC_SUCCESS)) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * stop_resume() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job printf(" Submitting job\n"); fflush(NULL); if((rc=gw_client_job_submit(jt,GW_JOB_STATE_PENDING,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Wait for wrapper state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } sleep(1); }while(strcmp(gw_job_state_string(job_status.job_state),"wrap")!=0); // Stop the job rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_STOP, GW_TRUE); printf(" Job stopped\n"); fflush(NULL); // Wait for stop state do { if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } }while(strcmp(gw_job_state_string(job_status.job_state),"stop")!=0); // Resume the job rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RESUME, GW_TRUE); printf(" Job resumed\n"); fflush(NULL); // Wait for the job to finish if((rc = gw_client_wait(job_id, &exit_code, timeout))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } // Check that it finished in the correct state if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }
char * hold_release() { char *reason; char *jt; int job_id; int deps[GW_JT_DEPS]; int rc; signed long timeout = -1; int exit_code; gw_msg_job_t job_status; reason = malloc(sizeof(char)*200); jt = malloc(sizeof(char)*50); if ( gw_session == NULL ) { sprintf(reason,"Could not connect to gwd"); return reason; } deps[0]=-1; sprintf(jt,"normal_execution.jt"); // Submit job in hold if((rc=gw_client_job_submit(jt,GW_JOB_STATE_HOLD,&job_id,deps,GW_JOB_DEFAULT_PRIORITY))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_submit] %s",gw_ret_code_string(rc)); return reason; } // Check that it's being submitted in hold if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"hold")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } // Release it rc = gw_client_job_signal (job_id, GW_CLIENT_SIGNAL_RELEASE, GW_TRUE); // Wait for the job to finish if((rc = gw_client_wait(job_id, &exit_code, timeout))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_wait] %s",gw_ret_code_string(rc)); return reason; } if(exit_code!=0) { sprintf(reason,"Wrong exit code %d",exit_code); return reason; } // Check that it finished in the correct state if((rc = gw_client_job_status(job_id, &job_status))!=GW_RC_SUCCESS) { sprintf(reason,"[gw_client_job_status] %s",gw_ret_code_string(rc)); return reason; } else if(strcmp(gw_job_state_string(job_status.job_state),"done")!=0) { sprintf(reason,"Wrong job state %s",gw_job_state_string(job_status.job_state)); return reason; } sprintf(reason,"OK"); return reason; }