// if status = running, and current_time > sim_start + max_confirm_wait // (usually 2 min), check if job is confirmed running (status_file exists). // If not confirmed, set job to JOB_QUEUE_FAILED. bool job_queue_node_update_status( job_queue_node_type * node , job_queue_status_type * status , queue_driver_type * driver ) { bool status_change = false; pthread_mutex_lock(&node->data_mutex); { if (node->job_data) { job_status_type current_status = job_queue_node_get_status(node); bool confirmed = job_queue_node_status_update_confirmed_running__(node); if ((current_status & JOB_QUEUE_RUNNING) && !confirmed) { // it's running, but not confirmed running. double runtime = job_queue_node_time_since_sim_start(node); if (runtime >= node->max_confirm_wait) { // max_confirm_wait has passed since sim_start without success; the job is dead job_status_type new_status = JOB_QUEUE_EXIT; status_change = job_queue_status_transition(status, current_status, new_status); job_queue_node_set_status(node, new_status); } } current_status = job_queue_node_get_status(node); if (current_status & JOB_QUEUE_CAN_UPDATE_STATUS) { job_status_type new_status = queue_driver_get_status( driver , node->job_data); status_change = job_queue_status_transition(status , current_status , new_status); job_queue_node_set_status(node,new_status); } } } pthread_mutex_unlock( &node->data_mutex ); return status_change; }
bool job_queue_node_kill( job_queue_node_type * node , job_queue_status_type * status , queue_driver_type * driver) { bool result = false; pthread_mutex_lock( &node->data_mutex ); { job_status_type current_status = job_queue_node_get_status( node ); if (current_status & JOB_QUEUE_CAN_KILL) { /* Jobs with status JOB_QUEUE_WAITING are killable - in the sense that status should be set to JOB_QUEUE_USER_KILLED; but they do not have any driver specific job_data, and the driver->kill_job() function can NOT be called. */ if (current_status != JOB_QUEUE_WAITING) { queue_driver_kill_job( driver , node->job_data ); if (node->job_data) { queue_driver_free_job( driver , node->job_data ); node->job_data = NULL; } } job_queue_status_transition(status, current_status, JOB_QUEUE_USER_KILLED); job_queue_node_set_status( node , JOB_QUEUE_USER_KILLED); result = true; } } pthread_mutex_unlock( &node->data_mutex ); return result; }
void job_queue_node_restart( job_queue_node_type * node , job_queue_status_type * status) { pthread_mutex_lock( &node->data_mutex ); { job_status_type current_status = job_queue_node_get_status( node ); job_queue_status_transition(status, current_status, JOB_QUEUE_WAITING); job_queue_node_set_status( node , JOB_QUEUE_WAITING); job_queue_node_reset_submit_attempt(node); } pthread_mutex_unlock( &node->data_mutex ); }
bool job_queue_node_status_transition( job_queue_node_type * node , job_queue_status_type * status , job_status_type new_status) { bool status_change; pthread_mutex_lock( &node->data_mutex ); { job_status_type old_status = job_queue_node_get_status( node ); status_change = job_queue_status_transition(status , old_status, new_status); if (status_change) job_queue_node_set_status( node , new_status ); } pthread_mutex_unlock( &node->data_mutex ); return status_change; }
bool job_queue_node_update_status( job_queue_node_type * node , job_queue_status_type * status , queue_driver_type * driver) { bool status_change = false; pthread_mutex_lock( &node->data_mutex ); { if (node->job_data) { job_status_type current_status = job_queue_node_get_status(node); if (current_status & JOB_QUEUE_CAN_UPDATE_STATUS) { job_status_type new_status = queue_driver_get_status( driver , node->job_data); status_change = job_queue_status_transition(status , current_status , new_status); job_queue_node_set_status(node,new_status); } } } pthread_mutex_unlock( &node->data_mutex ); return status_change; }
submit_status_type job_queue_node_submit( job_queue_node_type * node , job_queue_status_type * status , queue_driver_type * driver) { submit_status_type submit_status; void * job_data = queue_driver_submit_job( driver, node->run_cmd, node->num_cpu, node->run_path, node->job_name, node->argc, (const char **) node->argv); pthread_mutex_lock( &node->data_mutex ); { if (job_data != NULL) { job_status_type old_status = node->job_status; job_status_type new_status = JOB_QUEUE_SUBMITTED; node->job_data = job_data; node->submit_attempt++; /* The status JOB_QUEUE_SUBMITTED is internal, and not exported anywhere. The job_queue_update_status() will update this to PENDING or RUNNING at the next call. The important difference between SUBMITTED and WAITING is that SUBMITTED have job_data != NULL and the job_queue_node free function must be called on it. */ submit_status = SUBMIT_OK; job_queue_node_set_status( node , new_status); job_queue_status_transition(status, old_status, new_status); } else /* In this case the status of the job itself will be unmodified; i.e. it will still be WAITING, and a new attempt to submit it will be performed in the next round. */ submit_status = SUBMIT_DRIVER_FAIL; } pthread_mutex_unlock( &node->data_mutex ); return submit_status; }
bool job_queue_node_kill( job_queue_node_type * node , job_queue_status_type * status , queue_driver_type * driver) { bool result = false; pthread_mutex_lock( &node->data_mutex ); { job_status_type current_status = job_queue_node_get_status( node ); if (current_status & JOB_QUEUE_CAN_KILL) { /* If the job is killed before it is even started no driver specific job data has been assigned; we therefor must check the node->job_data pointer before entering. */ if (node->job_data) { queue_driver_kill_job( driver , node->job_data ); queue_driver_free_job( driver , node->job_data ); node->job_data = NULL; } job_queue_status_transition(status, current_status, JOB_QUEUE_USER_KILLED); job_queue_node_set_status( node , JOB_QUEUE_USER_KILLED); result = true; } } pthread_mutex_unlock( &node->data_mutex ); return result; }