/* Add proctrack container (PAGG) to a job container */ extern int container_p_add_cont(uint32_t job_id, uint64_t cont_id) { #ifdef HAVE_NATIVE_CRAY jid_t cjob_id = cont_id; rid_t resv_id = job_id; int rc; DEF_TIMERS; #endif if (debug_flags & DEBUG_FLAG_JOB_CONT) { info("%s: adding cont(%u.%"PRIu64")", plugin_type, job_id, cont_id); } #ifdef HAVE_NATIVE_CRAY START_TIMER; rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } else END_TIMER3("container_p_add_cont: job_attach_reservation took", 3000000); if ((rc != 0) && (errno == ENOENT)) { /* Log and retry */ if (debug_flags & DEBUG_FLAG_JOB_CONT) info("%s: add(%u.%"PRIu64"): No reservation found, " "no big deal, this is probably the first time " "this was called. We will just create a new one.", plugin_type, job_id, cont_id); START_TIMER; rc = job_create_reservation(resv_id, CREATE_FLAGS); rc = job_attach_reservation(cjob_id, resv_id, ADD_FLAGS); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } else END_TIMER3("container_p_add_cont: " "job_(create&attach)_reservation took", 3000000); } if ((rc == 0) || (errno == EBUSY)) { if (rc) { /* EBUSY - job ID already attached to a reservation * Duplicate adds can be generated by prolog/epilog */ debug2("%s: add(%u.%"PRIu64"): %m", plugin_type, job_id, cont_id); } else if (debug_flags & DEBUG_FLAG_JOB_CONT) _stat_reservation("add", resv_id); return SLURM_SUCCESS; } error("%s: add(%u.%"PRIu64"): %m", plugin_type, job_id, cont_id); return SLURM_ERROR; #else return SLURM_SUCCESS; #endif }
/* *********************************************************************** */ extern int slurm_topo_build_config( void ) { int rc; DEF_TIMERS; if ( slurm_topo_init() < 0 ) return SLURM_ERROR; START_TIMER; rc = (*(ops.build_config))(); END_TIMER3("slurm_topo_build_config", 20000); return rc; }
extern int container_p_delete(uint32_t job_id) { #ifdef HAVE_NATIVE_CRAY rid_t resv_id = job_id; DEF_TIMERS; #endif int rc = 0; int i, found = -1; bool job_id_change = false; if (debug_flags & DEBUG_FLAG_JOB_CONT) info("%s: deleting(%u)", plugin_type, job_id); slurm_mutex_lock(&context_lock); for (i = 0; i < job_id_count; i++) { if (job_id_array[i] == job_id) { job_id_array[i] = 0; job_id_change = true; found = i; } } if (found == -1) info("%s: no job for delete(%u)", plugin_type, job_id); if (job_id_change) _save_state(state_dir); slurm_mutex_unlock(&context_lock); #ifdef HAVE_NATIVE_CRAY START_TIMER; rc = job_end_reservation(resv_id, DELETE_FLAGS); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } else END_TIMER3("container_p_delete: job_end_reservation took", 3000000); #endif if (rc == 0) return SLURM_SUCCESS; if ((errno == ENOENT) || (errno == EINPROGRESS) || (errno == EALREADY)) return SLURM_SUCCESS; /* Not fatal error */ error("%s: delete(%u): %m", plugin_type, job_id); return SLURM_ERROR; }
/* rollup usage for one cluster */ static int _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, time_t sent_start, time_t sent_end, uint16_t archive_data) { DEF_VARS; int rc = SLURM_SUCCESS; time_t last_hour = sent_start; time_t last_day = sent_start; time_t last_month = sent_start; time_t start_time = 0; time_t end_time = 0; time_t my_time = sent_end; struct tm start_tm, end_tm; DEF_TIMERS; char *ru_fields = "hourly_rollup, daily_rollup, monthly_rollup"; enum { F_HOUR, F_DAY, F_MONTH, F_COUNT }; if (!sent_start) { query = xstrdup_printf("SELECT %s FROM %s.%s LIMIT 1", ru_fields, cluster, last_ran_table); result = DEF_QUERY_RET; if (!result) return SLURM_ERROR; if (PQntuples(result)) { last_hour = atoi(PG_VAL(F_HOUR)); last_day = atoi(PG_VAL(F_DAY)); last_month = atoi(PG_VAL(F_MONTH)); PQclear(result); } else { time_t now = time(NULL); PQclear(result); query = xstrdup_printf("SELECT %s.init_last_ran(%ld);", cluster, now); result = DEF_QUERY_RET; if (!result) return SLURM_ERROR; last_hour = last_day = last_month = atoi(PG_VAL(0)); PQclear(result); if (last_hour < 0) { debug("cluster %s not registered, " "not doing rollup", cluster); return SLURM_SUCCESS; } } } if (!my_time) my_time = time(NULL); if (!localtime_r(&last_hour, &start_tm)) { error("Couldn't get localtime from hour start %ld", last_hour); return SLURM_ERROR; } if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from hour end %ld", my_time); return SLURM_ERROR; } /* below and anywhere in a rollup plugin when dealing with * epoch times we need to set the tm_isdst = -1 so we don't * have to worry about the time changes. Not setting it to -1 * will cause problems in the day and month with the date change. */ /* align to hour boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("hour start %s", ctime(&start_time)); */ /* info("hour end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ //slurm_mutex_lock(&rollup_lock); global_last_rollup = end_time; //slurm_mutex_unlock(&rollup_lock); if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_hourly_rollup(pg_conn, cluster, start_time, end_time)) != SLURM_SUCCESS) return rc; END_TIMER3("hourly_rollup", 5000000); /* If we have a sent_end do not update the last_run_table */ if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET hourly_rollup=%ld", cluster, last_ran_table, end_time); } else { debug2("no need to run this hour %ld <= %ld", end_time, start_time); } if (!localtime_r(&last_day, &start_tm)) { error("Couldn't get localtime from day %ld", last_day); return SLURM_ERROR; } /* align to day boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_tm.tm_hour = 0; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("day start %s", ctime(&start_time)); */ /* info("day end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_daily_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("daily_rollup"); if (query && !sent_end) xstrfmtcat(query, ", daily_rollup=%ld", (long)end_time); else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET daily_rollup=%ld", cluster, last_ran_table, (long)end_time); } else { debug2("no need to run this day %ld <= %ld", (long)end_time, (long)start_time); } if (!localtime_r(&last_month, &start_tm)) { error("Couldn't get localtime from month %ld", last_month); return SLURM_ERROR; } /* align to month boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_time = mktime(&end_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_hour = 0; end_tm.tm_mday = 1; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("month start %s", ctime(&start_time)); */ /* info("month end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_monthly_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("monthly_rollup"); if (query && !sent_end) xstrfmtcat(query, ", monthly_rollup=%ld", (long)end_time); else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET monthly_rollup=%ld", cluster, last_ran_table, (long)end_time); } else { debug2("no need to run this month %ld <= %ld", (long)end_time, (long)start_time); } if (query) { rc = DEF_QUERY_RET_RC; } return rc; }
extern int container_p_create(uint32_t job_id) { #ifdef HAVE_NATIVE_CRAY rid_t resv_id = job_id; int rc; #endif int i, empty = -1, found = -1; DEF_TIMERS; START_TIMER; if (debug_flags & DEBUG_FLAG_JOB_CONT) info("%s: creating(%u)", plugin_type, job_id); slurm_mutex_lock(&context_lock); for (i = 0; i < job_id_count; i++) { if (job_id_array[i] == 0) { empty = i; } else if (job_id_array[i] == job_id) { found = i; break; } } if (found == -1) { if (empty == -1) { empty = job_id_count; job_id_count += 4; job_id_array = xrealloc(job_id_array, sizeof(uint32_t)*job_id_count); } job_id_array[empty] = job_id; _save_state(state_dir); } slurm_mutex_unlock(&context_lock); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } else { END_TIMER3("container_p_create: saving state took", 3000000); } #ifdef HAVE_NATIVE_CRAY START_TIMER; rc = job_create_reservation(resv_id, CREATE_FLAGS); if (debug_flags & DEBUG_FLAG_TIME_CRAY) { END_TIMER; INFO_LINE("call took: %s", TIME_STR); } else END_TIMER3("container_p_create: job_create_reservation took", 3000000); if ((rc == 0) || (errno == EEXIST)) { if ((found == -1) && (rc != 0) && (errno == EEXIST)) { error("%s: create(%u): Reservation already exists", plugin_type, job_id); } if (debug_flags & DEBUG_FLAG_JOB_CONT) _stat_reservation("create", resv_id); return SLURM_SUCCESS; } error("%s: create(%u): %m", plugin_type, job_id); return SLURM_ERROR; #else return SLURM_SUCCESS; #endif }
static void *_cluster_rollup_usage(void *arg) { local_rollup_t *local_rollup = (local_rollup_t *)arg; int rc = SLURM_SUCCESS; char timer_str[128]; mysql_conn_t mysql_conn; MYSQL_RES *result = NULL; MYSQL_ROW row; char *query = NULL; struct tm start_tm; struct tm end_tm; time_t my_time = local_rollup->sent_end; time_t last_hour = local_rollup->sent_start; time_t last_day = local_rollup->sent_start; time_t last_month = local_rollup->sent_start; time_t hour_start; time_t hour_end; time_t day_start; time_t day_end; time_t month_start; time_t month_end; DEF_TIMERS; char *update_req_inx[] = { "hourly_rollup", "daily_rollup", "monthly_rollup" }; enum { UPDATE_HOUR, UPDATE_DAY, UPDATE_MONTH, UPDATE_COUNT }; memset(&mysql_conn, 0, sizeof(mysql_conn_t)); mysql_conn.rollback = 1; mysql_conn.conn = local_rollup->mysql_conn->conn; slurm_mutex_init(&mysql_conn.lock); /* Each thread needs it's own connection we can't use the one * sent from the parent thread. */ rc = check_connection(&mysql_conn); if (rc != SLURM_SUCCESS) goto end_it; if (!local_rollup->sent_start) { char *tmp = NULL; int i=0; xstrfmtcat(tmp, "%s", update_req_inx[i]); for(i=1; i<UPDATE_COUNT; i++) { xstrfmtcat(tmp, ", %s", update_req_inx[i]); } query = xstrdup_printf("select %s from \"%s_%s\"", tmp, local_rollup->cluster_name, last_ran_table); xfree(tmp); debug4("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); if (!(result = mysql_db_query_ret(&mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); row = mysql_fetch_row(result); if (row) { last_hour = slurm_atoul(row[UPDATE_HOUR]); last_day = slurm_atoul(row[UPDATE_DAY]); last_month = slurm_atoul(row[UPDATE_MONTH]); mysql_free_result(result); } else { time_t now = time(NULL); time_t lowest = now; mysql_free_result(result); query = xstrdup_printf( "select time_start from \"%s_%s\" " "where node_name='' order by " "time_start asc limit 1;", local_rollup->cluster_name, event_table); debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); if (!(result = mysql_db_query_ret( &mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); if ((row = mysql_fetch_row(result))) { time_t check = slurm_atoul(row[0]); if (check < lowest) lowest = check; } mysql_free_result(result); /* If we don't have any events like adding a * cluster this will not work correctly, so we * will insert now as a starting point. */ query = xstrdup_printf( "insert into \"%s_%s\" " "(hourly_rollup, daily_rollup, monthly_rollup) " "values (%ld, %ld, %ld);", local_rollup->cluster_name, last_ran_table, lowest, lowest, lowest); debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(&mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { rc = SLURM_ERROR; goto end_it; } if (lowest == now) { debug("Cluster %s not registered, " "not doing rollup", local_rollup->cluster_name); rc = SLURM_SUCCESS; goto end_it; } last_hour = last_day = last_month = lowest; } } if (!my_time) my_time = time(NULL); /* test month gap */ /* last_hour = 1212299999; */ /* last_day = 1212217200; */ /* last_month = 1212217200; */ /* my_time = 1212307200; */ /* last_hour = 1211475599; */ /* last_day = 1211475599; */ /* last_month = 1211475599; */ // last_hour = 1211403599; // last_hour = 1206946800; // last_day = 1207033199; // last_day = 1197033199; // last_month = 1204358399; if (!localtime_r(&last_hour, &start_tm)) { error("Couldn't get localtime from hour start %ld", last_hour); rc = SLURM_ERROR; goto end_it; } if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from hour end %ld", my_time); rc = SLURM_ERROR; goto end_it; } /* Below and anywhere in a rollup plugin when dealing with * epoch times we need to set the tm_isdst = -1 so we don't * have to worry about the time changes. Not setting it to -1 * will cause problems in the day and month with the date change. */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_isdst = -1; hour_start = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_isdst = -1; hour_end = mktime(&end_tm); /* info("hour start %s", slurm_ctime(&hour_start)); */ /* info("hour end %s", slurm_ctime(&hour_end)); */ /* info("diff is %d", hour_end-hour_start); */ slurm_mutex_lock(&rollup_lock); global_last_rollup = hour_end; slurm_mutex_unlock(&rollup_lock); /* set up the day period */ if (!localtime_r(&last_day, &start_tm)) { error("Couldn't get localtime from day %ld", last_day); rc = SLURM_ERROR; goto end_it; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; day_start = mktime(&start_tm); end_tm.tm_hour = 0; end_tm.tm_isdst = -1; day_end = mktime(&end_tm); /* info("day start %s", slurm_ctime(&day_start)); */ /* info("day end %s", slurm_ctime(&day_end)); */ /* info("diff is %d", day_end-day_start); */ /* set up the month period */ if (!localtime_r(&last_month, &start_tm)) { error("Couldn't get localtime from month %ld", last_month); rc = SLURM_ERROR; goto end_it; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_isdst = -1; month_start = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_hour = 0; end_tm.tm_mday = 1; end_tm.tm_isdst = -1; month_end = mktime(&end_tm); /* info("month start %s", slurm_ctime(&month_start)); */ /* info("month end %s", slurm_ctime(&month_end)); */ /* info("diff is %d", month_end-month_start); */ if ((hour_end - hour_start) > 0) { START_TIMER; rc = as_mysql_hourly_rollup(&mysql_conn, local_rollup->cluster_name, hour_start, hour_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "hourly_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((day_end - day_start) > 0) { START_TIMER; rc = as_mysql_daily_rollup(&mysql_conn, local_rollup->cluster_name, day_start, day_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "daily_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((month_end - month_start) > 0) { START_TIMER; rc = as_mysql_monthly_rollup(&mysql_conn, local_rollup->cluster_name, month_start, month_end, local_rollup->archive_data); snprintf(timer_str, sizeof(timer_str), "monthly_rollup for %s", local_rollup->cluster_name); END_TIMER3(timer_str, 5000000); if (rc != SLURM_SUCCESS) goto end_it; } if ((hour_end - hour_start) > 0) { /* If we have a sent_end do not update the last_run_table */ if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set hourly_rollup=%ld", local_rollup->cluster_name, last_ran_table, hour_end); } else debug2("No need to roll cluster %s this hour %ld <= %ld", local_rollup->cluster_name, hour_end, hour_start); if ((day_end - day_start) > 0) { if (query && !local_rollup->sent_end) xstrfmtcat(query, ", daily_rollup=%ld", day_end); else if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set daily_rollup=%ld", local_rollup->cluster_name, last_ran_table, day_end); } else debug2("No need to roll cluster %s this day %ld <= %ld", local_rollup->cluster_name, day_end, day_start); if ((month_end - month_start) > 0) { if (query && !local_rollup->sent_end) xstrfmtcat(query, ", monthly_rollup=%ld", month_end); else if (!local_rollup->sent_end) query = xstrdup_printf( "update \"%s_%s\" set monthly_rollup=%ld", local_rollup->cluster_name, last_ran_table, month_end); } else debug2("No need to roll cluster %s this month %ld <= %ld", local_rollup->cluster_name, month_end, month_start); if (query) { debug3("%d(%s:%d) query\n%s", mysql_conn.conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(&mysql_conn, query); xfree(query); } end_it: if (rc == SLURM_SUCCESS) { if (mysql_db_commit(&mysql_conn)) { error("Couldn't commit rollup of cluster %s", local_rollup->cluster_name); rc = SLURM_ERROR; } } else { error("Cluster %s rollup failed", local_rollup->cluster_name); if (mysql_db_rollback(&mysql_conn)) error("rollback failed"); } mysql_db_close_db_connection(&mysql_conn); slurm_mutex_destroy(&mysql_conn.lock); slurm_mutex_lock(local_rollup->rolledup_lock); (*local_rollup->rolledup)++; if ((rc != SLURM_SUCCESS) && ((*local_rollup->rc) == SLURM_SUCCESS)) (*local_rollup->rc) = rc; pthread_cond_signal(local_rollup->rolledup_cond); slurm_mutex_unlock(local_rollup->rolledup_lock); xfree(local_rollup); return NULL; }