extern int slurm_jobcomp_log_record ( struct job_record *job_ptr ) { int rc = SLURM_SUCCESS; char job_rec[1024]; char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32]; char select_buf[128], *state_string, *work_dir; size_t offset = 0, tot_size, wrote; enum job_states job_state; uint32_t time_limit; if ((log_name == NULL) || (job_comp_fd < 0)) { error("JobCompLoc log file %s not open", log_name); return SLURM_ERROR; } slurm_mutex_lock( &file_lock ); _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (time_limit == INFINITE) strcpy(lim_str, "UNLIMITED"); else { snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit); } if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } if (job_ptr->details && job_ptr->details->work_dir) work_dir = job_ptr->details->work_dir; else work_dir = "unknown"; select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED); snprintf(job_rec, sizeof(job_rec), JOB_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, job_ptr->name, state_string, job_ptr->partition, lim_str, start_str, end_str, job_ptr->nodes, job_ptr->node_cnt, job_ptr->total_cpus, work_dir, select_buf); tot_size = strlen(job_rec); while ( offset < tot_size ) { wrote = write(job_comp_fd, job_rec + offset, tot_size - offset); if (wrote == -1) { if (errno == EAGAIN) continue; else { plugin_errno = errno; rc = SLURM_ERROR; break; } } offset += wrote; } slurm_mutex_unlock( &file_lock ); return rc; }
extern int slurm_jobcomp_log_record(struct job_record *job_ptr) { int nwritten, B_SIZE = 1024; char usr_str[32], grp_str[32], start_str[32], end_str[32]; char submit_str[32], *cluster = NULL, *qos, *state_string; time_t elapsed_time, submit_time, eligible_time; enum job_states job_state; uint32_t time_limit; uint16_t ntasks_per_node; int i; char *buffer, tmp_str[256], *script_str, *script; struct job_node *jnode; if (list_count(jobslist) > MAX_JOBS) { error("%s: Limit of %d enqueued jobs in memory waiting to be " "indexed reached. Job %lu discarded", plugin_type, MAX_JOBS, (unsigned long)job_ptr->job_id); return SLURM_ERROR; } _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } elapsed_time = job_ptr->end_time - job_ptr->start_time; buffer = xmalloc(B_SIZE); nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, start_str, end_str, (long) elapsed_time, job_ptr->partition, job_ptr->alloc_node, job_ptr->nodes, (unsigned long) job_ptr->total_cpus, (unsigned long) job_ptr->total_nodes, (unsigned long) job_ptr->derived_ec, (unsigned long) job_ptr->exit_code, state_string); if (nwritten >= B_SIZE) { B_SIZE += nwritten + 1; buffer = xrealloc(buffer, B_SIZE); nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, start_str, end_str, (long) elapsed_time, job_ptr->partition, job_ptr->alloc_node, job_ptr->nodes, (unsigned long) job_ptr->total_cpus, (unsigned long) job_ptr->total_nodes, (unsigned long) job_ptr->derived_ec, (unsigned long) job_ptr->exit_code, state_string); if (nwritten >= B_SIZE) { error("%s: Job completion data truncated and lost", plugin_type); return SLURM_ERROR; } } snprintf(tmp_str, sizeof(tmp_str), ",\"cpu_hours\":%.6f", ((float) elapsed_time * (float) job_ptr->total_cpus) / (float) 3600); xstrcat(buffer, tmp_str); if (job_ptr->array_task_id != NO_VAL) { xstrfmtcat(buffer, ",\"array_job_id\":%lu", (unsigned long) job_ptr->array_job_id); xstrfmtcat(buffer, ",\"array_task_id\":%lu", (unsigned long) job_ptr->array_task_id); } if (job_ptr->details && (job_ptr->details->submit_time != NO_VAL)) { submit_time = job_ptr->details->submit_time; _make_time_str(&submit_time, submit_str, sizeof(submit_str)); xstrfmtcat(buffer, ",\"@submit\":\"%s\"", submit_str); } if (job_ptr->details && (job_ptr->details->begin_time != NO_VAL)) { eligible_time = job_ptr->start_time - job_ptr->details->begin_time; xstrfmtcat(buffer, ",\"eligible_time\":%lu", eligible_time); } if (job_ptr->details && (job_ptr->details->work_dir && job_ptr->details->work_dir[0])) { xstrfmtcat(buffer, ",\"work_dir\":\"%s\"", job_ptr->details->work_dir); } if (job_ptr->details && (job_ptr->details->std_err && job_ptr->details->std_err[0])) { xstrfmtcat(buffer, ",\"std_err\":\"%s\"", job_ptr->details->std_err); } if (job_ptr->details && (job_ptr->details->std_in && job_ptr->details->std_in[0])) { xstrfmtcat(buffer, ",\"std_in\":\"%s\"", job_ptr->details->std_in); } if (job_ptr->details && (job_ptr->details->std_out && job_ptr->details->std_out[0])) { xstrfmtcat(buffer, ",\"std_out\":\"%s\"", job_ptr->details->std_out); } if (job_ptr->assoc_ptr != NULL) { cluster = ((slurmdb_assoc_rec_t *) job_ptr->assoc_ptr)->cluster; xstrfmtcat(buffer, ",\"cluster\":\"%s\"", cluster); } if (job_ptr->qos_ptr != NULL) { slurmdb_qos_rec_t *assoc = (slurmdb_qos_rec_t *) job_ptr->qos_ptr; qos = assoc->name; xstrfmtcat(buffer, ",\"qos\":\"%s\"", qos); } if (job_ptr->details && (job_ptr->details->num_tasks != NO_VAL)) { xstrfmtcat(buffer, ",\"ntasks\":%hu", job_ptr->details->num_tasks); } if (job_ptr->details && (job_ptr->details->ntasks_per_node != NO_VAL)) { ntasks_per_node = job_ptr->details->ntasks_per_node; xstrfmtcat(buffer, ",\"ntasks_per_node\":%hu", ntasks_per_node); } if (job_ptr->details && (job_ptr->details->cpus_per_task != NO_VAL)) { xstrfmtcat(buffer, ",\"cpus_per_task\":%hu", job_ptr->details->cpus_per_task); } if (job_ptr->details && (job_ptr->details->orig_dependency && job_ptr->details->orig_dependency[0])) { xstrfmtcat(buffer, ",\"orig_dependency\":\"%s\"", job_ptr->details->orig_dependency); } if (job_ptr->details && (job_ptr->details->exc_nodes && job_ptr->details->exc_nodes[0])) { xstrfmtcat(buffer, ",\"excluded_nodes\":\"%s\"", job_ptr->details->exc_nodes); } if (time_limit != INFINITE) { xstrfmtcat(buffer, ",\"time_limit\":%lu", (unsigned long) time_limit * 60); } if (job_ptr->resv_name && job_ptr->resv_name[0]) { xstrfmtcat(buffer, ",\"reservation_name\":\"%s\"", job_ptr->resv_name); } if (job_ptr->gres_req && job_ptr->gres_req[0]) { xstrfmtcat(buffer, ",\"gres_req\":\"%s\"", job_ptr->gres_req); } if (job_ptr->gres_alloc && job_ptr->gres_alloc[0]) { xstrfmtcat(buffer, ",\"gres_alloc\":\"%s\"", job_ptr->gres_alloc); } if (job_ptr->account && job_ptr->account[0]) { xstrfmtcat(buffer, ",\"account\":\"%s\"", job_ptr->account); } script = get_job_script(job_ptr); if (script && script[0]) { script_str = _json_escape(script); xstrfmtcat(buffer, ",\"script\":\"%s\"", script_str); xfree(script_str); } xfree(script); if (job_ptr->assoc_ptr) { assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr; char *parent_accounts = NULL; char **acc_aux = NULL; int nparents = 0; assoc_mgr_lock(&locks); /* Start at the first parent and go up. When studying * this code it was slightly faster to do 2 loops on * the association linked list and only 1 xmalloc but * we opted for cleaner looking code and going with a * realloc. */ while (assoc_ptr) { if (assoc_ptr->acct) { acc_aux = xrealloc(acc_aux, sizeof(char *) * (nparents + 1)); acc_aux[nparents++] = assoc_ptr->acct; } assoc_ptr = assoc_ptr->usage->parent_assoc_ptr; } for (i = nparents - 1; i >= 0; i--) xstrfmtcat(parent_accounts, "/%s", acc_aux[i]); xfree(acc_aux); xstrfmtcat(buffer, ",\"parent_accounts\":\"%s\"", parent_accounts); xfree(parent_accounts); assoc_mgr_unlock(&locks); } xstrcat(buffer, "}"); jnode = xmalloc(sizeof(struct job_node)); jnode->serialized_job = xstrdup(buffer); list_enqueue(jobslist, jnode); return SLURM_SUCCESS; }
extern int slurm_jobcomp_log_record(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; char *usr_str = NULL, *grp_str = NULL, lim_str[32]; char *connect_type = NULL, *reboot = NULL, *rotate = NULL, *geometry = NULL, *start = NULL, *blockid = NULL; enum job_states job_state; char *query = NULL; uint32_t time_limit, start_time, end_time; if(!jobcomp_mysql_conn || mysql_db_ping(jobcomp_mysql_conn) != 0) { char *loc = slurm_get_jobcomp_loc(); if(slurm_jobcomp_set_location(loc) == SLURM_ERROR) { xfree(loc); return SLURM_ERROR; } xfree(loc); } usr_str = _get_user_name(job_ptr->user_id); grp_str = _get_group_name(job_ptr->group_id); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (time_limit == INFINITE) strcpy(lim_str, "UNLIMITED"); else { snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit); } /* Job will typically be COMPLETING when this is called. * We remove the flags to get the eventual completion state: * JOB_FAILED, JOB_TIMEOUT, etc. */ if (IS_JOB_RESIZING(job_ptr)) { job_state = JOB_RESIZING; if (job_ptr->resize_time) start_time = job_ptr->resize_time; else start_time = job_ptr->start_time; end_time = time(NULL); } else { job_state = job_ptr->job_state & JOB_STATE_BASE; if (job_ptr->resize_time) start_time = job_ptr->resize_time; else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ start_time = 0; } else start_time = job_ptr->start_time; end_time = job_ptr->end_time; } connect_type = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_CONNECTION); reboot = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_REBOOT); rotate = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_ROTATE); geometry = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_GEOMETRY); start = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_START); #ifdef HAVE_BG blockid = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_BG_ID); #else blockid = select_g_select_jobinfo_xstrdup(job_ptr->select_jobinfo, SELECT_PRINT_RESV_ID); #endif query = xstrdup_printf( "insert into %s (jobid, uid, user_name, gid, group_name, " "name, state, proc_cnt, partition, timelimit, " "starttime, endtime, nodecnt", jobcomp_table); if(job_ptr->nodes) xstrcat(query, ", nodelist"); if(connect_type) xstrcat(query, ", connect_type"); if(reboot) xstrcat(query, ", reboot"); if(rotate) xstrcat(query, ", rotate"); if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrcat(query, ", maxprocs"); if(geometry) xstrcat(query, ", geometry"); if(start) xstrcat(query, ", start"); if(blockid) xstrcat(query, ", blockid"); xstrfmtcat(query, ") values (%u, %u, '%s', %u, '%s', \"%s\", %d, %u, " "'%s', \"%s\", %u, %u, %u", job_ptr->job_id, job_ptr->user_id, usr_str, job_ptr->group_id, grp_str, job_ptr->name, job_state, job_ptr->total_cpus, job_ptr->partition, lim_str, start_time, end_time, job_ptr->node_cnt); if(job_ptr->nodes) xstrfmtcat(query, ", '%s'", job_ptr->nodes); if(connect_type) { xstrfmtcat(query, ", '%s'", connect_type); xfree(connect_type); } if(reboot) { xstrfmtcat(query, ", '%s'", reboot); xfree(reboot); } if(rotate) { xstrfmtcat(query, ", '%s'", rotate); xfree(rotate); } if(job_ptr->details && (job_ptr->details->max_cpus != NO_VAL)) xstrfmtcat(query, ", '%u'", job_ptr->details->max_cpus); if(geometry) { xstrfmtcat(query, ", '%s'", geometry); xfree(geometry); } if(start) { xstrfmtcat(query, ", '%s'", start); xfree(start); } if(blockid) { xstrfmtcat(query, ", '%s'", blockid); xfree(blockid); } xstrcat(query, ")"); //info("query = %s", query); rc = mysql_db_query(jobcomp_mysql_conn, query); xfree(usr_str); xfree(grp_str); return rc; }
extern int slurm_jobcomp_log_record ( struct job_record *job_ptr ) { int rc = SLURM_SUCCESS; char job_rec[1024]; char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32]; char *resv_name, *gres, *account, *qos, *wckey, *cluster; char submit_time[32], eligible_time[32], array_id[64], pack_id[64]; char select_buf[128], *state_string, *work_dir; size_t offset = 0, tot_size, wrote; uint32_t job_state; uint32_t time_limit; if ((log_name == NULL) || (job_comp_fd < 0)) { error("JobCompLoc log file %s not open", log_name); return SLURM_ERROR; } slurm_mutex_lock( &file_lock ); _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (time_limit == INFINITE) strcpy(lim_str, "UNLIMITED"); else { snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit); } if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } if (job_ptr->details && job_ptr->details->work_dir) work_dir = job_ptr->details->work_dir; else work_dir = "unknown"; if (job_ptr->resv_name && job_ptr->resv_name[0]) resv_name = job_ptr->resv_name; else resv_name = ""; if (job_ptr->gres_req && job_ptr->gres_req[0]) gres = job_ptr->gres_req; else gres = ""; if (job_ptr->account && job_ptr->account[0]) account = job_ptr->account; else account = ""; if (job_ptr->qos_ptr != NULL) { qos = job_ptr->qos_ptr->name; } else qos = ""; if (job_ptr->wckey && job_ptr->wckey[0]) wckey = job_ptr->wckey; else wckey = ""; if (job_ptr->assoc_ptr != NULL) cluster = job_ptr->assoc_ptr->cluster; else cluster = "unknown"; if (job_ptr->details && job_ptr->details->submit_time) { _make_time_str(&job_ptr->details->submit_time, submit_time, sizeof(submit_time)); } else { snprintf(submit_time, sizeof(submit_time), "unknown"); } if (job_ptr->details && job_ptr->details->begin_time) { _make_time_str(&job_ptr->details->begin_time, eligible_time, sizeof(eligible_time)); } else { snprintf(eligible_time, sizeof(eligible_time), "unknown"); } if (job_ptr->array_task_id != NO_VAL) { snprintf(array_id, sizeof(array_id), " ArrayJobId=%u ArrayTaskId=%u", job_ptr->array_job_id, job_ptr->array_task_id); } else { array_id[0] = '\0'; } if (job_ptr->pack_job_id) { snprintf(pack_id, sizeof(pack_id), " PackJobId=%u PackJobOffset=%u", job_ptr->pack_job_id, job_ptr->pack_job_offset); } else { pack_id[0] = '\0'; } select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED); snprintf(job_rec, sizeof(job_rec), JOB_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, job_ptr->name, state_string, job_ptr->partition, lim_str, start_str, end_str, job_ptr->nodes, job_ptr->node_cnt, job_ptr->total_cpus, work_dir, resv_name, gres, account, qos, wckey, cluster, submit_time, eligible_time, array_id, pack_id, job_ptr->derived_ec, job_ptr->exit_code, select_buf); tot_size = strlen(job_rec); while (offset < tot_size) { wrote = write(job_comp_fd, job_rec + offset, tot_size - offset); if (wrote == -1) { if (errno == EAGAIN) continue; else { plugin_errno = errno; rc = SLURM_ERROR; break; } } offset += wrote; } slurm_mutex_unlock( &file_lock ); return rc; }