/* * as_pg_add_clusters - add clusters * * IN pg_conn: database connection * IN uid: user performing the add operation * IN cluster_list: clusters to add * RET: error code */ extern int as_pg_add_clusters(pgsql_conn_t *pg_conn, uint32_t uid, List cluster_list) { ListIterator itr = NULL; int rc = SLURM_SUCCESS, added = 0; slurmdb_cluster_rec_t *object = NULL; time_t now = time(NULL); List assoc_list = NULL; slurmdb_association_rec_t *assoc = NULL; char *txn_info = NULL, *query = NULL, *user_name = NULL; if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; assoc_list = list_create(slurmdb_destroy_association_rec); user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(cluster_list); while((object = list_next(itr))) { if(!object->name) { error("as/pg: add_clusters: We need a cluster " "name to add."); rc = SLURM_ERROR; continue; } if (strchr(object->name, '.')) { error("as/pg: add_clusters: invalid cluster name %s", object->name); rc = SLURM_ERROR; continue; } if (cluster_in_db(pg_conn, object->name)) { error("cluster %s already added", object->name); rc = SLURM_ERROR; continue; } query = xstrdup_printf( "SELECT public.add_cluster(" "(%ld, %ld, 0, '%s', '', 0, 0, %u, 1, 0, 0));", (long)now, (long)now, object->name, object->classification); rc = DEF_QUERY_RET_RC; if(rc != SLURM_SUCCESS) { error("Couldn't add cluster %s", object->name); added = 0; /* rollback modification to DB */ break; } rc = _create_cluster_tables(pg_conn, object->name); if (rc != SLURM_SUCCESS) { error("Failed creating cluster tables for %s", object->name); added = 0; break; } /* add root account assoc: <'cluster', 'root', '', ''> */ if (add_cluster_root_assoc(pg_conn, now, object, &txn_info) != SLURM_SUCCESS) { added = 0; break; } if (add_txn(pg_conn, now, "", DBD_ADD_CLUSTERS, object->name, user_name, txn_info) != SLURM_SUCCESS) { error("as/pg: add_cluster: couldn't add txn"); } else { added ++; } xfree(txn_info); /* Add user root by default to run from the root * association. This gets popped off so we need to * read it every time here. */ assoc = xmalloc(sizeof(slurmdb_association_rec_t)); slurmdb_init_association_rec(assoc, 0); list_append(assoc_list, assoc); assoc->cluster = xstrdup(object->name); assoc->user = xstrdup("root"); assoc->acct = xstrdup("root"); if(acct_storage_p_add_associations(pg_conn, uid, assoc_list) == SLURM_ERROR) { error("Problem adding root user association"); rc = SLURM_ERROR; } list_flush(assoc_list); /* do not add it again, in case not popped */ } list_iterator_destroy(itr); xfree(user_name); list_destroy(assoc_list); if (!added) { reset_pgsql_conn(pg_conn); } else { /* when loading sacctmgr cfg file, get_assoc will be called before commit */ pg_conn->cluster_changed = 1; } return rc; }
/* * js_pg_job_start - load into the storage the start of a job * * IN pg_conn: database connection * IN cluster_name: cluster of the job * IN job_ptr: job just started * RET: error code */ extern int js_pg_job_start(pgsql_conn_t *pg_conn, struct job_record *job_ptr) { int rc=SLURM_SUCCESS, track_steps = 0, reinit = 0; char *jname = NULL, *nodes = NULL, *node_inx = NULL; char *block_id = NULL, *rec = NULL, *query = NULL; time_t begin_time, check_time, start_time, submit_time; int job_state, node_cnt = 0; uint32_t wckeyid = 0; if ((!job_ptr->details || !job_ptr->details->submit_time) && !job_ptr->resize_time) { error("as/pg: job_start: Not inputing this job, " "it has no submit time."); return SLURM_ERROR; } if (check_db_connection(pg_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; if (! cluster_in_db(pg_conn, pg_conn->cluster_name) ) { error("cluster %s not in db", pg_conn->cluster_name); return SLURM_ERROR; } debug3("as/pg: job_start() called"); job_state = job_ptr->job_state; /* Since we need a new db_inx make sure the old db_inx * removed. This is most likely the only time we are going to * be notified of the change also so make the state without * the resize. */ if (IS_JOB_RESIZING(job_ptr)) { /* If we have a db_index lets end the previous record. */ if (job_ptr->db_index) js_pg_job_complete(pg_conn, job_ptr); else error("We don't have a db_index for job %u, " "this should never happen.", job_ptr->job_id); job_state &= (~JOB_RESIZING); job_ptr->db_index = 0; } job_state &= JOB_STATE_BASE; if (job_ptr->resize_time) { begin_time = job_ptr->resize_time; submit_time = job_ptr->resize_time; start_time = job_ptr->resize_time; } else { begin_time = job_ptr->details->begin_time; submit_time = job_ptr->details->submit_time; start_time = job_ptr->start_time; } /* See what we are hearing about here if no start time. If * this job latest time is before the last roll up we will * need to reset it to look at this job. */ if (start_time) check_time = start_time; else if (begin_time) check_time = begin_time; else check_time = submit_time; slurm_mutex_lock(&usage_rollup_lock); if (check_time < global_last_rollup) { PGresult *result = NULL; /* check to see if we are hearing about this time for the * first time. */ query = xstrdup_printf( "SELECT job_db_inx FROM %s.%s WHERE id_job=%u AND " "time_submit=%ld AND time_eligible=%ld AND time_start=%ld", pg_conn->cluster_name, job_table, job_ptr->job_id, submit_time, begin_time, start_time); result = DEF_QUERY_RET; if (!result) { slurm_mutex_unlock(&usage_rollup_lock); return SLURM_ERROR; } if (PQntuples(result) != 0) { PQclear(result); debug4("revieved an update for a " "job (%u) already known about", job_ptr->job_id); slurm_mutex_unlock(&usage_rollup_lock); goto no_rollup_change; } PQclear(result); if (job_ptr->start_time) debug("Need to reroll usage from %s Job %u " "from %s started then and we are just " "now hearing about it.", ctime(&check_time), job_ptr->job_id, pg_conn->cluster_name); else if (begin_time) debug("Need to reroll usage from %s Job %u " "from %s became eligible then and we are just " "now hearing about it.", ctime(&check_time), job_ptr->job_id, pg_conn->cluster_name); else debug("Need to reroll usage from %s Job %u " "from %s was submitted then and we are just " "now hearing about it.", ctime(&check_time), job_ptr->job_id, pg_conn->cluster_name); global_last_rollup = check_time; slurm_mutex_unlock(&usage_rollup_lock); query = xstrdup_printf("UPDATE %s.%s SET hourly_rollup=%ld, " "daily_rollup=%ld, monthly_rollup=%ld", pg_conn->cluster_name, last_ran_table, check_time, check_time, check_time); rc = DEF_QUERY_RET_RC; } else slurm_mutex_unlock(&usage_rollup_lock); no_rollup_change: if (job_ptr->name && job_ptr->name[0]) jname = xstrdup(job_ptr->name); else { jname = xstrdup("allocation"); track_steps = 1; } if (job_ptr->nodes && job_ptr->nodes[0]) nodes = job_ptr->nodes; else nodes = "None assigned"; if (job_ptr->batch_flag) track_steps = 1; if (slurmdbd_conf) { block_id = xstrdup(job_ptr->comment); node_cnt = job_ptr->total_nodes; node_inx = job_ptr->network; } else { char temp_bit[BUF_SIZE]; if (job_ptr->node_bitmap) { node_inx = bit_fmt(temp_bit, sizeof(temp_bit), job_ptr->node_bitmap); } #ifdef HAVE_BG select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &block_id); select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); #else node_cnt = job_ptr->total_nodes; #endif } /* If there is a start_time get the wckeyid. If the job is * cancelled before the job starts we also want to grab it. */ if (job_ptr->assoc_id && (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr))) wckeyid = get_wckeyid(pg_conn, &job_ptr->wckey, job_ptr->user_id, pg_conn->cluster_name, job_ptr->assoc_id); if (!job_ptr->db_index) { if (!begin_time) begin_time = submit_time; rec = xstrdup_printf( "(0, 0, '%s', '%s', %d, %d, 0, '%s', " "%d, '%s', %d, %d, %d, %d, %d, %d, 0, " "%d, %ld, %ld, %ld, 0, 0, " "%d, '%s', '%s', %d, %d, '%s', %d)", /* job_db_inx=0, not used */ /* deleted=0 */ job_ptr->account ?: "", /* account */ job_ptr->partition ?: "", /* partition */ (int)job_ptr->details->min_cpus, /* cpus_req */ (int)job_ptr->total_cpus, /* cpus_alloc */ /* exit_code=0 */ jname, /* job_name */ (int)job_ptr->assoc_id, /* id_assoc */ block_id ?: "", /* id_block */ (int)job_ptr->job_id, /* id_job */ (int)job_ptr->qos_id, /* id_qos */ (int)job_ptr->resv_id, /* id_resv */ (int)wckeyid, /* id_wckey */ (int)job_ptr->user_id, /* uid */ (int)job_ptr->group_id, /* gid */ /* kill_requid=0 */ (int)job_ptr->time_limit, /* timelimit */ submit_time, /* time_submit */ begin_time, /* time_eligible */ start_time, /* time_start */ /* time_end=0 */ /* time_suspended=0 */ (int)node_cnt, /* nodes_alloc */ nodes ?: "", /* nodelist */ node_inx ?: "", /* node_inx */ (int)job_ptr->priority, /* priority */ (int)job_state, /* state */ job_ptr->wckey ?: "", /* wckey */ (int)track_steps); query = xstrdup_printf("SELECT %s.add_job_start(%s);", pg_conn->cluster_name, rec); xfree(rec); try_again: DEBUG_QUERY; job_ptr->db_index = pgsql_query_ret_id(pg_conn->db_conn, query); if (!job_ptr->db_index) { if (!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); check_db_connection(pg_conn); reinit = 1; goto try_again; } else rc = SLURM_ERROR; } xfree(query); } else {