extern int destroy_mysql_conn(mysql_conn_t *mysql_conn) { if (mysql_conn) { mysql_db_close_db_connection(mysql_conn); xfree(mysql_conn->pre_commit_query); xfree(mysql_conn->cluster_name); slurm_mutex_destroy(&mysql_conn->lock); list_destroy(mysql_conn->update_list); xfree(mysql_conn); } return SLURM_SUCCESS; }
static void *_cluster_rollup_usage(void *arg) { local_rollup_t *local_rollup = (local_rollup_t *)arg; int rc = SLURM_SUCCESS; char timer_str[128]; mysql_conn_t mysql_conn; MYSQL_RES *result = NULL; MYSQL_ROW row; char *query = NULL; struct tm start_tm; struct tm end_tm; time_t my_time = local_rollup->sent_end; time_t last_hour = local_rollup->sent_start; time_t last_day = local_rollup->sent_start; time_t last_month = local_rollup->sent_start; time_t hour_start; time_t hour_end; time_t day_start; time_t day_end; time_t month_start; time_t month_end; DEF_TIMERS; char *update_req_inx[] = { "hourly_rollup", "daily_rollup", "monthly_rollup" }; enum { UPDATE_HOUR, UPDATE_DAY, UPDATE_MONTH, UPDATE_COUNT }; memset(&mysql_conn, 0, sizeof(mysql_conn_t)); mysql_conn.rollback = 1; mysql_conn.conn = local_rollup->mysql_conn->conn; slurm_mutex_init(&mysql_conn.lock); /* Each thread needs it's own connection we can't use the one * sent from the parent thread. */ rc = check_connection(&mysql_conn); if (rc != SLURM_SUCCESS) goto end_it; if (!local_rollup->sent_start) { char *tmp = NULL; int i=0; xstrfmtcat(tmp, "%s", update_req_inx[i]); for(i=1; i<UPDATE_COUNT; i++) { xstrfmtcat(tmp, ", %s", update_req_inx[i]); } query = xstrdup_printf("select %s from \"%s_%s\"", tmp, local_rollup->cluster_name, last_ran_table); xfree(tmp); debug4("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); if (!(result = mysql_db_query_ret(&mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); row = mysql_fetch_row(result); if (row) { last_hour = slurm_atoul(row[UPDATE_HOUR]); last_day = slurm_atoul(row[UPDATE_DAY]); last_month = slurm_atoul(row[UPDATE_MONTH]); mysql_free_result(result); } else { time_t now = time(NULL); time_t lowest = now; mysql_free_result(result); query = xstrdup_printf( "select time_start from \"%s_%s\" " "where node_name='' order by " "time_start asc limit 1;", local_rollup->cluster_name, event_table); debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); if (!(result = mysql_db_query_ret( &mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); if ((row = mysql_fetch_row(result))) { time_t check = slurm_atoul(row[0]); if (check < lowest) lowest = check; } mysql_free_result(result); /* If we don't have any events like adding a * cluster this will not work correctly, so we * will insert now as a starting point. */ query = xstrdup_printf( "insert into \"%s_%s\" " "(hourly_rollup, daily_rollup, monthly_rollup) " "values (%ld, %ld, %ld);", local_rollup->cluster_name, last_ran_table, lowest, lowest, lowest); debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(&mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { rc = SLURM_ERROR; goto end_it; } if (lowest == now) { debug("Cluster %s not registered, " "not doing rollup", local_rollup->cluster_name); rc = SLURM_SUCCESS; goto end_it; } last_hour = last_day = last_month = lowest; } } if (!my_time) my_time = time(NULL); /* test month gap */ /* last_hour = 1212299999; */ /* last_day = 1212217200; */ /* last_month = 1212217200; */ /* my_time = 1212307200; */ /* last_hour = 1211475599; */ /* last_day = 1211475599; */ /* last_month = 1211475599; */ // last_hour = 1211403599; // last_hour = 1206946800; // last_day = 1207033199; // last_day = 1197033199; // last_month = 1204358399; if (!localtime_r(&last_hour, &start_tm)) { error("Couldn't get localtime from hour start %ld", last_hour); rc = SLURM_ERROR; goto end_it; } if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from hour end %ld", my_time); rc = SLURM_ERROR; goto end_it; } /* Below and anywhere in a rollup plugin when dealing with * epoch times we need to set the tm_isdst = -1 so we don't * have to worry about the time changes. Not setting it to -1 * will cause problems in the day and month with the date change. */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_isdst = -1; hour_start = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_isdst = -1; hour_end = mktime(&end_tm); /* info("hour start %s", slurm_ctime(&hour_start)); */ /* info("hour end %s", slurm_ctime(&hour_end)); */ /* info("diff is %d", hour_end-hour_start); */ slurm_mutex_lock(&rollup_lock); global_last_rollup = hour_end; slurm_mutex_unlock(&rollup_lock); /* set up the day period */ if (!localtime_r(&last_day, &start_tm)) { error("Couldn't get localtime from day %ld", last_day); rc = SLURM_ERROR; goto end_it; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; day_start = mktime(&start_tm); end_tm.tm_hour = 0; end_tm.tm_isdst = -1; day_end = mktime(&end_tm); /* info("day start %s", slurm_ctime(&day_start)); */ /* info("day end %s", slurm_ctime(&day_end)); */ /* info("diff is %d", day_end-day_start); */ /* set up the month period */ if (!localtime_r(&last_month, &start_tm)) { error("Couldn't get localtime from month %ld", last_month); rc = SLURM_ERROR; goto end_it; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_isdst = -1; month_start = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_hour = 0; end_tm.tm_mday = 1; end_tm.tm_isdst = -1; month_end = mktime(&end_tm); /* info("month start %s", slurm_ctime(&month_start)); */ /* info("month end %s", slurm_ctime(&month_end)); */ /* info("diff is %d", month_end-month_start); */ if ((hour_end - hour_start) > 0) { START_TIMER; rc = as_mysql_hourly_rollup(&mysql_conn, local_rollup->cluster_name, hour_start, hour_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "hourly_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((day_end - day_start) > 0) { START_TIMER; rc = as_mysql_daily_rollup(&mysql_conn, local_rollup->cluster_name, day_start, day_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "daily_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((month_end - month_start) > 0) { START_TIMER; rc = as_mysql_monthly_rollup(&mysql_conn, local_rollup->cluster_name, month_start, month_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "monthly_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((hour_end - hour_start) > 0) { /* If we have a sent_end do not update the last_run_table */ if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set hourly_rollup=%ld", local_rollup->cluster_name, last_ran_table, hour_end); } else debug2("No need to roll cluster %s this hour %ld <= %ld", local_rollup->cluster_name, hour_end, hour_start); if ((day_end - day_start) > 0) { if (query && !local_rollup->sent_end) xstrfmtcat(query, ", daily_rollup=%ld", day_end); else if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set daily_rollup=%ld", local_rollup->cluster_name, last_ran_table, day_end); } else debug2("No need to roll cluster %s this day %ld <= %ld", local_rollup->cluster_name, day_end, day_start); if ((month_end - month_start) > 0) { if (query && !local_rollup->sent_end) xstrfmtcat(query, ", monthly_rollup=%ld", month_end); else if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set monthly_rollup=%ld", local_rollup->cluster_name, last_ran_table, month_end); } else debug2("No need to roll cluster %s this month %ld <= %ld", local_rollup->cluster_name, month_end, month_start); if (query) { debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(&mysql_conn, query); xfree(query); } end_it: if (rc == SLURM_SUCCESS) { if (mysql_db_commit(&mysql_conn)) { error("Couldn't commit rollup of cluster %s", local_rollup->cluster_name); rc = SLURM_ERROR; } } else { error("Cluster %s rollup failed", local_rollup->cluster_name); if (mysql_db_rollback(&mysql_conn)) error("rollback failed"); } mysql_db_close_db_connection(&mysql_conn); slurm_mutex_destroy(&mysql_conn.lock); slurm_mutex_lock(local_rollup->rolledup_lock); (*local_rollup->rolledup)++; if ((rc != SLURM_SUCCESS) && ((*local_rollup->rc) == SLURM_SUCCESS)) (*local_rollup->rc) = rc; pthread_cond_signal(local_rollup->rolledup_cond); slurm_mutex_unlock(local_rollup->rolledup_lock); xfree(local_rollup); return NULL; }
extern int as_mysql_job_start(mysql_conn_t *mysql_conn, struct job_record *job_ptr) { int rc=SLURM_SUCCESS; char *nodes = NULL, *jname = NULL, *node_inx = NULL; int track_steps = 0; char *block_id = NULL, *partition = NULL, *gres_req = NULL, *gres_alloc = NULL; char *query = NULL; int reinit = 0; time_t begin_time, check_time, start_time, submit_time; uint32_t wckeyid = 0; int job_state, node_cnt = 0; uint32_t job_db_inx = job_ptr->db_index; if ((!job_ptr->details || !job_ptr->details->submit_time) && !job_ptr->resize_time) { error("as_mysql_job_start: " "Not inputing this job, it has no submit time."); return SLURM_ERROR; } if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; debug2("as_mysql_slurmdb_job_start() called"); job_state = job_ptr->job_state; if (job_ptr->resize_time) { begin_time = job_ptr->resize_time; submit_time = job_ptr->resize_time; start_time = job_ptr->resize_time; } else { begin_time = job_ptr->details->begin_time; submit_time = job_ptr->details->submit_time; start_time = job_ptr->start_time; } /* Since we need a new db_inx make sure the old db_inx * removed. This is most likely the only time we are going to * be notified of the change also so make the state without * the resize. */ if (IS_JOB_RESIZING(job_ptr)) { /* If we have a db_index lets end the previous record. */ if (!job_ptr->db_index) { error("We don't have a db_index for job %u, " "this should only happen when resizing " "jobs and the database interface was down.", job_ptr->job_id); job_ptr->db_index = _get_db_index(mysql_conn, job_ptr->details-> submit_time, job_ptr->job_id, job_ptr->assoc_id); } if (job_ptr->db_index) as_mysql_job_complete(mysql_conn, job_ptr); job_state &= (~JOB_RESIZING); job_ptr->db_index = 0; } job_state &= JOB_STATE_BASE; /* See what we are hearing about here if no start time. If * this job latest time is before the last roll up we will * need to reset it to look at this job. */ if (start_time) check_time = start_time; else if (begin_time) check_time = begin_time; else check_time = submit_time; slurm_mutex_lock(&rollup_lock); if (check_time < global_last_rollup) { MYSQL_RES *result = NULL; MYSQL_ROW row; /* check to see if we are hearing about this time for the * first time. */ query = xstrdup_printf("select job_db_inx " "from \"%s_%s\" where id_job=%u and " "time_submit=%ld and time_eligible=%ld " "and time_start=%ld;", mysql_conn->cluster_name, job_table, job_ptr->job_id, submit_time, begin_time, start_time); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); slurm_mutex_unlock(&rollup_lock); return SLURM_ERROR; } xfree(query); if ((row = mysql_fetch_row(result))) { mysql_free_result(result); debug4("revieved an update for a " "job (%u) already known about", job_ptr->job_id); slurm_mutex_unlock(&rollup_lock); goto no_rollup_change; } mysql_free_result(result); if (job_ptr->start_time) debug("Need to reroll usage from %sJob %u " "from %s started then and we are just " "now hearing about it.", slurm_ctime(&check_time), job_ptr->job_id, mysql_conn->cluster_name); else if (begin_time) debug("Need to reroll usage from %sJob %u " "from %s became eligible then and we are just " "now hearing about it.", slurm_ctime(&check_time), job_ptr->job_id, mysql_conn->cluster_name); else debug("Need to reroll usage from %sJob %u " "from %s was submitted then and we are just " "now hearing about it.", slurm_ctime(&check_time), job_ptr->job_id, mysql_conn->cluster_name); global_last_rollup = check_time; slurm_mutex_unlock(&rollup_lock); /* If the times here are later than the daily_rollup or monthly rollup it isn't a big deal since they are always shrunk down to the beginning of each time period. */ query = xstrdup_printf("update \"%s_%s\" set " "hourly_rollup=%ld, " "daily_rollup=%ld, monthly_rollup=%ld", mysql_conn->cluster_name, last_ran_table, check_time, check_time, check_time); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(mysql_conn, query); xfree(query); } else slurm_mutex_unlock(&rollup_lock); no_rollup_change: if (job_ptr->name && job_ptr->name[0]) jname = slurm_add_slash_to_quotes(job_ptr->name); else { jname = xstrdup("allocation"); track_steps = 1; } if (job_ptr->nodes && job_ptr->nodes[0]) nodes = job_ptr->nodes; else nodes = "None assigned"; if (job_ptr->batch_flag) track_steps = 1; if (slurmdbd_conf) { block_id = xstrdup(job_ptr->comment); node_cnt = job_ptr->total_nodes; node_inx = job_ptr->network; } else { char temp_bit[BUF_SIZE]; if (job_ptr->node_bitmap) { node_inx = bit_fmt(temp_bit, sizeof(temp_bit), job_ptr->node_bitmap); } #ifdef HAVE_BG select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &block_id); select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); #else node_cnt = job_ptr->total_nodes; #endif } /* If there is a start_time get the wckeyid. If the job is * cancelled before the job starts we also want to grab it. */ if (job_ptr->assoc_id && (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr))) wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey, job_ptr->user_id, mysql_conn->cluster_name, job_ptr->assoc_id); if (job_ptr->partition) partition = slurm_add_slash_to_quotes(job_ptr->partition); if (job_ptr->gres_req) gres_req = slurm_add_slash_to_quotes(job_ptr->gres_req); if (job_ptr->gres_alloc) gres_alloc = slurm_add_slash_to_quotes(job_ptr->gres_alloc); if (!job_ptr->db_index) { if (!begin_time) begin_time = submit_time; query = xstrdup_printf( "insert into \"%s_%s\" " "(id_job, id_array_job, id_array_task, " "id_assoc, id_qos, id_wckey, id_user, " "id_group, nodelist, id_resv, timelimit, " "time_eligible, time_submit, time_start, " "job_name, track_steps, state, priority, cpus_req, " "cpus_alloc, nodes_alloc, mem_req", mysql_conn->cluster_name, job_table); if (job_ptr->account) xstrcat(query, ", account"); if (partition) xstrcat(query, ", `partition`"); if (block_id) xstrcat(query, ", id_block"); if (job_ptr->wckey) xstrcat(query, ", wckey"); if (node_inx) xstrcat(query, ", node_inx"); if (gres_req) xstrcat(query, ", gres_req"); if (gres_alloc) xstrcat(query, ", gres_alloc"); xstrfmtcat(query, ") values (%u, %u, %u, %u, %u, %u, %u, %u, " "'%s', %u, %u, %ld, %ld, %ld, " "'%s', %u, %u, %u, %u, %u, %u, %u", job_ptr->job_id, job_ptr->array_job_id, job_ptr->array_task_id, job_ptr->assoc_id, job_ptr->qos_id, wckeyid, job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, begin_time, submit_time, start_time, jname, track_steps, job_state, job_ptr->priority, job_ptr->details->min_cpus, job_ptr->total_cpus, node_cnt, job_ptr->details->pn_min_memory); if (job_ptr->account) xstrfmtcat(query, ", '%s'", job_ptr->account); if (partition) xstrfmtcat(query, ", '%s'", partition); if (block_id) xstrfmtcat(query, ", '%s'", block_id); if (job_ptr->wckey) xstrfmtcat(query, ", '%s'", job_ptr->wckey); if (node_inx) xstrfmtcat(query, ", '%s'", node_inx); if (gres_req) xstrfmtcat(query, ", '%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", '%s'", gres_alloc); xstrfmtcat(query, ") on duplicate key update " "job_db_inx=LAST_INSERT_ID(job_db_inx), " "id_wckey=%u, id_user=%u, id_group=%u, " "nodelist='%s', id_resv=%u, timelimit=%u, " "time_submit=%ld, time_start=%ld, " "job_name='%s', track_steps=%u, id_qos=%u, " "state=greatest(state, %u), priority=%u, " "cpus_req=%u, cpus_alloc=%u, nodes_alloc=%u, " "mem_req=%u, id_array_job=%u, id_array_task=%u", wckeyid, job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, submit_time, start_time, jname, track_steps, job_ptr->qos_id, job_state, job_ptr->priority, job_ptr->details->min_cpus, job_ptr->total_cpus, node_cnt, job_ptr->details->pn_min_memory, job_ptr->array_job_id, job_ptr->array_task_id); if (job_ptr->account) xstrfmtcat(query, ", account='%s'", job_ptr->account); if (partition) xstrfmtcat(query, ", `partition`='%s'", partition); if (block_id) xstrfmtcat(query, ", id_block='%s'", block_id); if (job_ptr->wckey) xstrfmtcat(query, ", wckey='%s'", job_ptr->wckey); if (node_inx) xstrfmtcat(query, ", node_inx='%s'", node_inx); if (gres_req) xstrfmtcat(query, ", gres_req='%s'", gres_req); if (gres_alloc) xstrfmtcat(query, ", gres_alloc='%s'", gres_alloc); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); try_again: if (!(job_ptr->db_index = mysql_db_insert_ret_id( mysql_conn, query))) { if (!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); mysql_db_close_db_connection( mysql_conn); /* reconnect */ check_connection(mysql_conn); reinit = 1; goto try_again; } else rc = SLURM_ERROR; } } else { query = xstrdup_printf("update \"%s_%s\" set nodelist='%s', ", mysql_conn->cluster_name, job_table, nodes); if (job_ptr->account) xstrfmtcat(query, "account='%s', ", job_ptr->account); if (partition) xstrfmtcat(query, "`partition`='%s', ", partition); if (block_id) xstrfmtcat(query, "id_block='%s', ", block_id); if (job_ptr->wckey) xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey); if (node_inx) xstrfmtcat(query, "node_inx='%s', ", node_inx); if (gres_req) xstrfmtcat(query, "gres_req='%s', ", gres_req); if (gres_alloc) xstrfmtcat(query, "gres_alloc='%s', ", gres_alloc); xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, " "cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, " "id_assoc=%u, id_wckey=%u, id_resv=%u, " "timelimit=%u, mem_req=%u, " "id_array_job=%u, id_array_task=%u, " "time_eligible=%ld where job_db_inx=%d", start_time, jname, job_state, job_ptr->total_cpus, node_cnt, job_ptr->qos_id, job_ptr->assoc_id, wckeyid, job_ptr->resv_id, job_ptr->time_limit, job_ptr->details->pn_min_memory, job_ptr->array_job_id, job_ptr->array_task_id, begin_time, job_ptr->db_index); debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(mysql_conn, query); } xfree(block_id); xfree(partition); xfree(gres_req); xfree(gres_alloc); xfree(jname); xfree(query); /* now we will reset all the steps */ if (IS_JOB_RESIZING(job_ptr)) { /* FIXME : Verify this is still needed */ if (IS_JOB_SUSPENDED(job_ptr)) as_mysql_suspend(mysql_conn, job_db_inx, job_ptr); } return rc; }