static int _file_state(struct bcast_parameters *params) { /* validate the source file */ if ((fd = open(params->src_fname, O_RDONLY)) < 0) { error("Can't open `%s`: %s", params->src_fname, strerror(errno)); return SLURM_ERROR; } if (fstat(fd, &f_stat)) { error("Can't stat `%s`: %s", params->src_fname, strerror(errno)); return SLURM_ERROR; } verbose("modes = %o", (unsigned int) f_stat.st_mode); verbose("uid = %d", (int) f_stat.st_uid); verbose("gid = %d", (int) f_stat.st_gid); verbose("atime = %s", slurm_ctime2(&f_stat.st_atime)); verbose("mtime = %s", slurm_ctime2(&f_stat.st_mtime)); verbose("ctime = %s", slurm_ctime2(&f_stat.st_ctime)); verbose("size = %ld", (long) f_stat.st_size); if (!f_stat.st_size) { error("Warning: file `%s` is empty.", params->src_fname); return SLURM_SUCCESS; } src = mmap(NULL, f_stat.st_size, PROT_READ, MAP_SHARED, fd, 0); if (src == (void *) -1) { error("Can't mmap file `%s`, %m.", params->src_fname); return SLURM_ERROR; } return SLURM_SUCCESS; }
/* * scontrol_pid_info - given a local process id, print the corresponding * slurm job id and its expected end time * IN job_pid - the local process id of interest */ extern void scontrol_pid_info(pid_t job_pid) { int error_code; uint32_t job_id = 0; time_t end_time; long rem_time; error_code = slurm_pid2jobid(job_pid, &job_id); if (error_code) { exit_code = 1; if (quiet_flag != 1) slurm_perror ("slurm_pid2jobid error"); return; } error_code = slurm_get_end_time(job_id, &end_time); if (error_code) { exit_code = 1; if (quiet_flag != 1) slurm_perror ("slurm_get_end_time error"); return; } printf("Slurm job id %u ends at %s\n", job_id, slurm_ctime2(&end_time)); rem_time = slurm_get_rem_time(job_id); printf("slurm_get_rem_time is %ld\n", rem_time); return; }
/* * Job has been notified of it's approaching time limit. * Job will be killed shortly after timeout. * This RPC can arrive multiple times with the same or updated timeouts. * FIXME: We may want to signal the job or perform other action for this. * FIXME: How much lead time do we want for this message? Some jobs may * require tens of minutes to gracefully terminate. */ static void _timeout_handler(srun_timeout_msg_t *msg) { static time_t last_timeout = 0; if (msg->timeout != last_timeout) { last_timeout = msg->timeout; verbose("job time limit to be reached at %s", slurm_ctime2(&msg->timeout)); } }
extern void print_date(void) { time_t now_time = time(NULL); if (params.commandline) { printf("%s", slurm_ctime(&now_time)); } else { mvwprintw(text_win, main_ycord, main_xcord, "%s", slurm_ctime2(&now_time)); main_ycord++; } }
/* _rollup_handler - Process rollup duties */ static void *_rollup_handler(void *db_conn) { time_t start_time = time(NULL); time_t next_time; /* int sigarray[] = {SIGUSR1, 0}; */ struct tm tm; rollup_stats_t rollup_stats; int i; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); if (!slurm_localtime_r(&start_time, &tm)) { fatal("Couldn't get localtime for rollup handler %ld", (long)start_time); return NULL; } while (1) { if (!db_conn) break; /* run the roll up */ memset(&rollup_stats, 0, sizeof(rollup_stats_t)); slurm_mutex_lock(&rollup_lock); running_rollup = 1; debug2("running rollup at %s", slurm_ctime2(&start_time)); acct_storage_g_roll_usage(db_conn, 0, 0, 1, &rollup_stats); acct_storage_g_commit(db_conn, 1); running_rollup = 0; slurm_mutex_unlock(&rollup_lock); slurm_mutex_lock(&rpc_mutex); for (i = 0; i < ROLLUP_COUNT; i++) { if (rollup_stats.rollup_time[i] == 0) continue; rpc_stats.rollup_count[i]++; rpc_stats.rollup_time[i] += rollup_stats.rollup_time[i]; rpc_stats.rollup_max_time[i] = MAX(rpc_stats.rollup_max_time[i], rollup_stats.rollup_time[i]); } slurm_mutex_unlock(&rpc_mutex); /* get the time now we have rolled usage */ start_time = time(NULL); if (!slurm_localtime_r(&start_time, &tm)) { fatal("Couldn't get localtime for rollup handler %ld", (long)start_time); return NULL; } /* sleep until the next hour */ tm.tm_sec = 0; tm.tm_min = 0; tm.tm_hour++; tm.tm_isdst = -1; next_time = slurm_mktime(&tm); sleep((next_time - start_time)); start_time = next_time; /* Just in case some new uids were added to the system pick them up here. */ assoc_mgr_set_missing_uids(); /* repeat ;) */ } return NULL; }
extern int acct_gather_profile_p_node_step_start(stepd_step_rec_t* job) { int rc = SLURM_SUCCESS; char *profile_file_name; char *profile_str; xassert(_run_in_daemon()); g_job = job; xassert(hdf5_conf.dir); if (debug_flags & DEBUG_FLAG_PROFILE) { profile_str = acct_gather_profile_to_string(g_job->profile); info("PROFILE: option --profile=%s", profile_str); } if (g_profile_running == ACCT_GATHER_PROFILE_NOT_SET) g_profile_running = _determine_profile(); if (g_profile_running <= ACCT_GATHER_PROFILE_NONE) return rc; _create_directories(); /* Use a more user friendly string "batch" rather * then 4294967294. */ if (g_job->stepid == NO_VAL) { profile_file_name = xstrdup_printf("%s/%s/%u_%s_%s.h5", hdf5_conf.dir, g_job->user_name, g_job->jobid, "batch", g_job->node_name); } else { profile_file_name = xstrdup_printf( "%s/%s/%u_%u_%s.h5", hdf5_conf.dir, g_job->user_name, g_job->jobid, g_job->stepid, g_job->node_name); } if (debug_flags & DEBUG_FLAG_PROFILE) { profile_str = acct_gather_profile_to_string(g_profile_running); info("PROFILE: node_step_start, opt=%s file=%s", profile_str, profile_file_name); } // Create a new file using the default properties. file_id = H5Fcreate(profile_file_name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (chown(profile_file_name, (uid_t)g_job->uid, (gid_t)g_job->gid) < 0) error("chown(%s): %m", profile_file_name); chmod(profile_file_name, 0600); xfree(profile_file_name); if (file_id < 1) { info("PROFILE: Failed to create Node group"); return SLURM_FAILURE; } /* fd_set_close_on_exec(file_id); Not supported for HDF5 */ sprintf(group_node, "/%s", g_job->node_name); gid_node = make_group(file_id, group_node); if (gid_node < 0) { H5Fclose(file_id); file_id = -1; info("PROFILE: Failed to create Node group"); return SLURM_FAILURE; } put_int_attribute(gid_node, ATTR_NODEINX, g_job->nodeid); put_string_attribute(gid_node, ATTR_NODENAME, g_job->node_name); put_int_attribute(gid_node, ATTR_NTASKS, g_job->node_tasks); put_int_attribute(gid_node, ATTR_CPUPERTASK, g_job->cpus_per_task); step_start_time = time(NULL); put_string_attribute(gid_node, ATTR_STARTTIME, slurm_ctime2(&step_start_time)); return rc; }
extern int as_mysql_job_start(mysql_conn_t *mysql_conn, struct job_record *job_ptr) { int rc=SLURM_SUCCESS; char *nodes = NULL, *jname = NULL, *node_inx = NULL; int track_steps = 0; char *block_id = NULL, *partition = NULL, *gres_req = NULL, *gres_alloc = NULL; char *query = NULL; int reinit = 0; time_t begin_time, check_time, start_time, submit_time; uint32_t wckeyid = 0; uint32_t job_state; int node_cnt = 0; uint32_t array_task_id = (job_ptr->array_job_id) ? job_ptr->array_task_id : NO_VAL; uint32_t job_db_inx = job_ptr->db_index; job_array_struct_t *array_recs = job_ptr->array_recs; if ((!job_ptr->details || !job_ptr->details->submit_time) && !job_ptr->resize_time) { error("as_mysql_job_start: " "Not inputing this job, it has no submit time."); return SLURM_ERROR; } if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; debug2("as_mysql_slurmdb_job_start() called"); job_state = job_ptr->job_state; if (job_ptr->resize_time) { begin_time = job_ptr->resize_time; submit_time = job_ptr->resize_time; start_time = job_ptr->resize_time; } else { begin_time = job_ptr->details->begin_time; submit_time = job_ptr->details->submit_time; start_time = job_ptr->start_time; } /* If the reason is WAIT_ARRAY_TASK_LIMIT we don't want to * give the pending jobs an eligible time since it will add * time to accounting where as these jobs aren't able to run * until later so mark it as such. */ if (job_ptr->state_reason == WAIT_ARRAY_TASK_LIMIT) begin_time = INFINITE; /* Since we need a new db_inx make sure the old db_inx * removed. This is most likely the only time we are going to * be notified of the change also so make the state without * the resize. */ if (IS_JOB_RESIZING(job_ptr)) { /* If we have a db_index lets end the previous record. */ if (!job_ptr->db_index) { error("We don't have a db_index for job %u, " "this should only happen when resizing " "jobs and the database interface was down.", job_ptr->job_id); job_ptr->db_index = _get_db_index(mysql_conn, job_ptr->details-> submit_time, job_ptr->job_id, job_ptr->assoc_id); } if (job_ptr->db_index) as_mysql_job_complete(mysql_conn, job_ptr); job_state &= (~JOB_RESIZING); job_ptr->db_index = 0; } job_state &= JOB_STATE_BASE; /* See what we are hearing about here if no start time. If * this job latest time is before the last roll up we will * need to reset it to look at this job. */ if (start_time) check_time = start_time; else if (begin_time) check_time = begin_time; else check_time = submit_time; slurm_mutex_lock(&rollup_lock); if (check_time < global_last_rollup) { MYSQL_RES *result = NULL; MYSQL_ROW row; /* check to see if we are hearing about this time for the * first time. */ query = xstrdup_printf("select job_db_inx " "from \"%s_%s\" where id_job=%u and " "time_submit=%ld and time_eligible=%ld " "and time_start=%ld;", mysql_conn->cluster_name, job_table, job_ptr->job_id, submit_time, begin_time, start_time); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); slurm_mutex_unlock(&rollup_lock); return SLURM_ERROR; } xfree(query); if ((row = mysql_fetch_row(result))) { mysql_free_result(result); debug4("revieved an update for a " "job (%u) already known about", job_ptr->job_id); slurm_mutex_unlock(&rollup_lock); goto no_rollup_change; } mysql_free_result(result); if (job_ptr->start_time) debug("Need to reroll usage from %s Job %u " "from %s started then and we are just " "now hearing about it.", slurm_ctime2(&check_time), job_ptr->job_id, mysql_conn->cluster_name); else if (begin_time) debug("Need to reroll usage from %s Job %u " "from %s became eligible then and we are just " "now hearing about it.", slurm_ctime2(&check_time), job_ptr->job_id, mysql_conn->cluster_name); else debug("Need to reroll usage from %s Job %u " "from %s was submitted then and we are just " "now hearing about it.", slurm_ctime2(&check_time), job_ptr->job_id, mysql_conn->cluster_name); global_last_rollup = check_time; slurm_mutex_unlock(&rollup_lock); /* If the times here are later than the daily_rollup or monthly rollup it isn't a big deal since they are always shrunk down to the beginning of each time period. */ query = xstrdup_printf("update \"%s_%s\" set " "hourly_rollup=%ld, " "daily_rollup=%ld, monthly_rollup=%ld", mysql_conn->cluster_name, last_ran_table, check_time, check_time, check_time); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); } else slurm_mutex_unlock(&rollup_lock); no_rollup_change: if (job_ptr->name && job_ptr->name[0]) jname = slurm_add_slash_to_quotes(job_ptr->name); else { jname = xstrdup("allocation"); track_steps = 1; } if (job_ptr->nodes && job_ptr->nodes[0]) nodes = job_ptr->nodes; else nodes = "None assigned"; if (job_ptr->batch_flag) track_steps = 1; if (slurmdbd_conf) { block_id = xstrdup(job_ptr->comment); node_cnt = job_ptr->total_nodes; node_inx = job_ptr->network; } else { char temp_bit[BUF_SIZE]; if (job_ptr->node_bitmap) { node_inx = bit_fmt(temp_bit, sizeof(temp_bit), job_ptr->node_bitmap); } #ifdef HAVE_BG select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &block_id); select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); #else node_cnt = job_ptr->total_nodes; #endif } /* Grab the wckey once to make sure it is placed. */ if (job_ptr->assoc_id && (!job_ptr->db_index || job_ptr->wckey)) wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey, job_ptr->user_id, mysql_conn->cluster_name, job_ptr->assoc_id); if (!IS_JOB_PENDING(job_ptr) && job_ptr->part_ptr) partition = slurm_add_slash_to_quotes(job_ptr->part_ptr->name); else if (job_ptr->partition) partition = slurm_add_slash_to_quotes(job_ptr->partition); if (job_ptr->gres_req) gres_req = slurm_add_slash_to_quotes(job_ptr->gres_req); if (job_ptr->gres_alloc) gres_alloc = slurm_add_slash_to_quotes(job_ptr->gres_alloc); if (!job_ptr->db_index) { query = xstrdup_printf( "insert into \"%s_%s\" " "(id_job, mod_time, id_array_job, id_array_task, " "id_assoc, id_qos, id_user, " "id_group, nodelist, id_resv, timelimit, " "time_eligible, time_submit, time_start, " "job_name, track_steps, state, priority, cpus_req, " "nodes_alloc, mem_req", mysql_conn->cluster_name, job_table); if (wckeyid) xstrcat(query, ", id_wckey"); if (job_ptr->account) xstrcat(query, ", account"); if (partition) xstrcat(query, ", `partition`"); if (block_id) xstrcat(query, ", id_block"); if (job_ptr->wckey) xstrcat(query, ", wckey"); if (node_inx) xstrcat(query, ", node_inx"); if (gres_req) xstrcat(query, ", gres_req"); if (gres_alloc) xstrcat(query, ", gres_alloc"); if (array_recs && array_recs->task_id_str) xstrcat(query, ", array_task_str, array_max_tasks, " "array_task_pending"); else xstrcat(query, ", array_task_str, array_task_pending"); if (job_ptr->tres_alloc_str) xstrcat(query, ", tres_alloc"); if (job_ptr->tres_req_str) xstrcat(query, ", tres_req"); xstrfmtcat(query, ") values (%u, UNIX_TIMESTAMP(), " "%u, %u, %u, %u, %u, %u, " "'%s', %u, %u, %ld, %ld, %ld, " "'%s', %u, %u, %u, %u, %u, %u", job_ptr->job_id, job_ptr->array_job_id, array_task_id, job_ptr->assoc_id, job_ptr->qos_id, job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, begin_time, submit_time, start_time, jname, track_steps, job_state, job_ptr->priority, job_ptr->details->min_cpus, node_cnt, job_ptr->details->pn_min_memory); if (wckeyid) xstrfmtcat(query, ", %u", wckeyid); if (job_ptr->account) xstrfmtcat(query, ", '%s'", job_ptr->account); if (partition) xstrfmtcat(query, ", '%s'", partition); if (block_id) xstrfmtcat(query, ", '%s'", block_id); if (job_ptr->wckey) xstrfmtcat(query, ", '%s'", job_ptr->wckey); if (node_inx) xstrfmtcat(query, ", '%s'", node_inx); if (gres_req) xstrfmtcat(query, ", '%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", '%s'", gres_alloc); if (array_recs && array_recs->task_id_str) xstrfmtcat(query, ", '%s', %u, %u", array_recs->task_id_str, array_recs->max_run_tasks, array_recs->task_cnt); else xstrcat(query, ", NULL, 0"); if (job_ptr->tres_alloc_str) xstrfmtcat(query, ", '%s'", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) xstrfmtcat(query, ", '%s'", job_ptr->tres_req_str); xstrfmtcat(query, ") on duplicate key update " "job_db_inx=LAST_INSERT_ID(job_db_inx), " "id_user=%u, id_group=%u, " "nodelist='%s', id_resv=%u, timelimit=%u, " "time_submit=%ld, time_eligible=%ld, " "time_start=%ld, mod_time=UNIX_TIMESTAMP(), " "job_name='%s', track_steps=%u, id_qos=%u, " "state=greatest(state, %u), priority=%u, " "cpus_req=%u, nodes_alloc=%u, " "mem_req=%u, id_array_job=%u, id_array_task=%u", job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, submit_time, begin_time, start_time, jname, track_steps, job_ptr->qos_id, job_state, job_ptr->priority, job_ptr->details->min_cpus, node_cnt, job_ptr->details->pn_min_memory, job_ptr->array_job_id, array_task_id); if (wckeyid) xstrfmtcat(query, ", id_wckey=%u", wckeyid); if (job_ptr->account) xstrfmtcat(query, ", account='%s'", job_ptr->account); if (partition) xstrfmtcat(query, ", `partition`='%s'", partition); if (block_id) xstrfmtcat(query, ", id_block='%s'", block_id); if (job_ptr->wckey) xstrfmtcat(query, ", wckey='%s'", job_ptr->wckey); if (node_inx) xstrfmtcat(query, ", node_inx='%s'", node_inx); if (gres_req) xstrfmtcat(query, ", gres_req='%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", gres_alloc='%s'", gres_alloc); if (array_recs && array_recs->task_id_str) xstrfmtcat(query, ", array_task_str='%s', " "array_max_tasks=%u, array_task_pending=%u", array_recs->task_id_str, array_recs->max_run_tasks, array_recs->task_cnt); else xstrfmtcat(query, ", array_task_str=NULL, " "array_task_pending=0"); if (job_ptr->tres_alloc_str) xstrfmtcat(query, ", tres_alloc='%s'", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) xstrfmtcat(query, ", tres_req='%s'", job_ptr->tres_req_str); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); try_again: if (!(job_ptr->db_index = mysql_db_insert_ret_id( mysql_conn, query))) { if (!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); /* reconnect */ check_connection(mysql_conn); reinit = 1; goto try_again; } else rc = SLURM_ERROR; } } else { query = xstrdup_printf("update \"%s_%s\" set nodelist='%s', ", mysql_conn->cluster_name, job_table, nodes); if (wckeyid) xstrfmtcat(query, "id_wckey=%u, ", wckeyid); if (job_ptr->account) xstrfmtcat(query, "account='%s', ", job_ptr->account); if (partition) xstrfmtcat(query, "`partition`='%s', ", partition); if (block_id) xstrfmtcat(query, "id_block='%s', ", block_id); if (job_ptr->wckey) xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey); if (node_inx) xstrfmtcat(query, "node_inx='%s', ", node_inx); if (gres_req) xstrfmtcat(query, "gres_req='%s', ", gres_req); if (gres_alloc) xstrfmtcat(query, "gres_alloc='%s', ", gres_alloc); if (array_recs && array_recs->task_id_str) xstrfmtcat(query, "array_task_str='%s', " "array_max_tasks=%u, " "array_task_pending=%u, ", array_recs->task_id_str, array_recs->max_run_tasks, array_recs->task_cnt); else xstrfmtcat(query, "array_task_str=NULL, " "array_task_pending=0, "); if (job_ptr->tres_alloc_str) xstrfmtcat(query, "tres_alloc='%s', ", job_ptr->tres_alloc_str); if (job_ptr->tres_req_str) xstrfmtcat(query, "tres_req='%s', ", job_ptr->tres_req_str); xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, " "nodes_alloc=%u, id_qos=%u, " "id_assoc=%u, id_resv=%u, " "timelimit=%u, mem_req=%u, " "id_array_job=%u, id_array_task=%u, " "time_eligible=%ld, mod_time=UNIX_TIMESTAMP() " "where job_db_inx=%d", start_time, jname, job_state, node_cnt, job_ptr->qos_id, job_ptr->assoc_id, job_ptr->resv_id, job_ptr->time_limit, job_ptr->details->pn_min_memory, job_ptr->array_job_id, array_task_id, begin_time, job_ptr->db_index); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); } xfree(block_id); xfree(partition); xfree(gres_req); xfree(gres_alloc); xfree(jname); xfree(query); /* now we will reset all the steps */ if (IS_JOB_RESIZING(job_ptr)) { /* FIXME : Verify this is still needed */ if (IS_JOB_SUSPENDED(job_ptr)) as_mysql_suspend(mysql_conn, job_db_inx, job_ptr); } return rc; }