Beispiel #1
0
extern int sacctmgr_archive_dump(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_archive_cond_t *arch_cond =
		xmalloc(sizeof(slurmdb_archive_cond_t));
	int i=0;
	struct stat st;

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp (argv[i], "Set", MAX(command_len, 3)))
			i++;
		_set_cond(&i, argc, argv, arch_cond);
	}

	if (!arch_cond->purge_event)
		arch_cond->purge_event = NO_VAL;
	if (!arch_cond->purge_job)
		arch_cond->purge_job = NO_VAL;
	if (!arch_cond->purge_resv)
		arch_cond->purge_resv = NO_VAL;
	if (!arch_cond->purge_step)
		arch_cond->purge_step = NO_VAL;
	if (!arch_cond->purge_suspend)
		arch_cond->purge_suspend = NO_VAL;

	if (exit_code) {
		slurmdb_destroy_archive_cond(arch_cond);
		return SLURM_ERROR;
	}

	if (arch_cond->archive_dir) {
		if (stat(arch_cond->archive_dir, &st) < 0) {
			exit_code = errno;
			fprintf(stderr, " dump: Failed to stat %s: %m\n "
				"Note: For archive dump, "
				"the directory must be on "
				"the calling host.\n",
				arch_cond->archive_dir);
			return SLURM_ERROR;
		}

		if (!(st.st_mode & S_IFDIR)) {
			errno = EACCES;
			fprintf(stderr, " dump: "
				"archive dir %s isn't a directory\n",
				arch_cond->archive_dir);
			return SLURM_ERROR;
		}

		if (access(arch_cond->archive_dir, W_OK) < 0) {
			errno = EACCES;
			fprintf(stderr, " dump: "
				"archive dir %s is not writable\n",
				arch_cond->archive_dir);
			return SLURM_ERROR;
		}
	}

	if (arch_cond->archive_script) {
		if (stat(arch_cond->archive_script, &st) < 0) {
			exit_code = errno;
			fprintf(stderr, " dump: Failed to stat %s: %m\n "
				"Note: For archive dump, the script must be on "
				"the calling host.\n",
				arch_cond->archive_script);
			return SLURM_ERROR;
		}
		if (!(st.st_mode & S_IFREG)) {
			errno = EACCES;
			fprintf(stderr, " dump: "
				"archive script %s isn't a regular file\n",
				arch_cond->archive_script);
			return SLURM_ERROR;
		}

		if (access(arch_cond->archive_script, X_OK) < 0) {
			errno = EACCES;
			fprintf(stderr, " dump: "
				"archive script %s is not executable\n",
				arch_cond->archive_script);
			return SLURM_ERROR;
		}
	}

	rc = jobacct_storage_g_archive(db_conn, arch_cond);
	if (rc == SLURM_SUCCESS) {
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else {
		exit_code=1;
		fprintf(stderr, " Problem dumping archive: %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
	}
	slurmdb_destroy_archive_cond(arch_cond);

	return rc;
}
Beispiel #2
0
extern void specific_info_resv(popup_info_t *popup_win)
{
	int resv_error_code = SLURM_SUCCESS;
	static reserve_info_msg_t *resv_info_ptr = NULL;
	static reserve_info_t *resv_ptr = NULL;
	specific_info_t *spec_info = popup_win->spec_info;
	sview_search_info_t *search_info = spec_info->search_info;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	List resv_list = NULL;
	List send_resv_list = NULL;
	sview_resv_info_t *sview_resv_info_ptr = NULL;
	int j=0, i=-1;
	hostset_t hostset = NULL;
	ListIterator itr = NULL;

	if (!spec_info->display_widget) {
		setup_popup_info(popup_win, display_data_resv, SORTID_CNT);
	}

	if (spec_info->display_widget && popup_win->toggled) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
		goto display_it;
	}

	if ((resv_error_code =
	     get_new_info_resv(&resv_info_ptr, popup_win->force_refresh))
	    == SLURM_NO_CHANGE_IN_DATA) {
		if (!spec_info->display_widget || spec_info->view == ERROR_VIEW)
			goto display_it;
	} else if (resv_error_code != SLURM_SUCCESS) {
		if (spec_info->view == ERROR_VIEW)
			goto end_it;
		spec_info->view = ERROR_VIEW;
		if (spec_info->display_widget)
			gtk_widget_destroy(spec_info->display_widget);
		sprintf(error_char, "get_new_info_resv: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(popup_win->table,
					  label,
					  0, 1, 0, 1);
		gtk_widget_show(label);
		spec_info->display_widget = gtk_widget_ref(label);
		goto end_it;
	}

display_it:

	resv_list = _create_resv_info_list(resv_info_ptr);

	if (!resv_list)
		return;

	if (spec_info->view == ERROR_VIEW && spec_info->display_widget) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
	}
	if (spec_info->type != INFO_PAGE && !spec_info->display_widget) {
		tree_view = create_treeview(local_display_data,
					    &popup_win->grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		spec_info->display_widget =
			gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(popup_win->table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		   to the treestore we don't really care about
		   the return value */
		create_treestore(tree_view, popup_win->display_data,
				 SORTID_CNT, SORTID_TIME_START, SORTID_COLOR);
	}

	setup_popup_grid_list(popup_win);

	spec_info->view = INFO_VIEW;
	if (spec_info->type == INFO_PAGE) {
		_display_info_resv(resv_list, popup_win);
		goto end_it;
	}

	/* just linking to another list, don't free the inside, just
	   the list */
	send_resv_list = list_create(NULL);
	itr = list_iterator_create(resv_list);
	i = -1;
	while ((sview_resv_info_ptr = list_next(itr))) {
		i++;
		resv_ptr = sview_resv_info_ptr->resv_ptr;
		switch(spec_info->type) {
		case PART_PAGE:
		case BLOCK_PAGE:
		case NODE_PAGE:
			if (!resv_ptr->node_list)
				continue;

			if (!(hostset = hostset_create(
				      search_info->gchar_data)))
				continue;
			if (!hostset_intersects(hostset, resv_ptr->node_list)) {
				hostset_destroy(hostset);
				continue;
			}
			hostset_destroy(hostset);
			break;
		case JOB_PAGE:
			if (strcmp(resv_ptr->name,
				   search_info->gchar_data))
				continue;
			break;
		case RESV_PAGE:
			switch(search_info->search_type) {
			case SEARCH_RESERVATION_NAME:
				if (!search_info->gchar_data)
					continue;

				if (strcmp(resv_ptr->name,
					   search_info->gchar_data))
					continue;
				break;
			default:
				continue;
			}
			break;
		default:
			g_print("Unknown type %d\n", spec_info->type);
			continue;
		}
		list_push(send_resv_list, sview_resv_info_ptr);
		j=0;
		while (resv_ptr->node_inx[j] >= 0) {
			change_grid_color(
				popup_win->grid_button_list,
				resv_ptr->node_inx[j],
				resv_ptr->node_inx[j+1],
				sview_resv_info_ptr->color_inx,
				true, 0);
			j += 2;
		}
	}
	list_iterator_destroy(itr);
	post_setup_popup_grid_list(popup_win);

	_update_info_resv(send_resv_list,
			  GTK_TREE_VIEW(spec_info->display_widget));
	list_destroy(send_resv_list);
end_it:
	popup_win->toggled = 0;
	popup_win->force_refresh = 0;

	return;
}
Beispiel #3
0
static void *
_cancel_job_id (void *ci)
{
	int error_code = SLURM_SUCCESS, i;
	job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
	bool sig_set = true;
	uint16_t flags = 0;
	char *job_type = "";
	DEF_TIMERS;

	if (cancel_info->sig == (uint16_t) NO_VAL) {
		cancel_info->sig = SIGKILL;
		sig_set = false;
	}
	if (opt.batch) {
		flags |= KILL_JOB_BATCH;
		job_type = "batch ";
	}
	if (opt.full) {
		flags |= KILL_FULL_JOB;
		job_type = "full ";
	}
	if (cancel_info->array_flag)
		flags |= KILL_JOB_ARRAY;

	if (!cancel_info->job_id_str) {
		if (cancel_info->array_job_id &&
		    (cancel_info->array_task_id == INFINITE)) {
			xstrfmtcat(cancel_info->job_id_str, "%u_*",
				   cancel_info->array_job_id);
		} else if (cancel_info->array_job_id) {
			xstrfmtcat(cancel_info->job_id_str, "%u_%u",
				   cancel_info->array_job_id,
				   cancel_info->array_task_id);
		} else {
			xstrfmtcat(cancel_info->job_id_str, "%u",
				   cancel_info->job_id);
		}
	}

	if (!sig_set) {
		verbose("Terminating %sjob %s", job_type,
			cancel_info->job_id_str);
	} else {
		verbose("Signal %u to %sjob %s", cancel_info->sig, job_type,
			cancel_info->job_id_str);
	}

	for (i = 0; i < MAX_CANCEL_RETRY; i++) {
		_add_delay();
		START_TIMER;
		error_code = slurm_kill_job2(cancel_info->job_id_str,
					     cancel_info->sig, flags);
		END_TIMER;
		slurm_mutex_lock(&max_delay_lock);
		max_resp_time = MAX(max_resp_time, DELTA_TIMER);
		slurm_mutex_unlock(&max_delay_lock);

		if ((error_code == 0) ||
		    (errno != ESLURM_TRANSITION_STATE_NO_UPDATE))
			break;
		verbose("Job is in transistional state, retrying");
		sleep(5 + i);
	}
	if (error_code) {
		error_code = slurm_get_errno();
		if ((opt.verbose > 0) ||
		    ((error_code != ESLURM_ALREADY_DONE) &&
		     (error_code != ESLURM_INVALID_JOB_ID))) {
			error("Kill job error on job id %s: %s",
			      cancel_info->job_id_str,
			      slurm_strerror(slurm_get_errno()));
		}
		if (((error_code == ESLURM_ALREADY_DONE) ||
		     (error_code == ESLURM_INVALID_JOB_ID)) &&
		    (cancel_info->sig == SIGKILL)) {
			error_code = 0;	/* Ignore error if job done */
		}	
	}

	/* Purposely free the struct passed in here, so the caller doesn't have
	 * to keep track of it, but don't destroy the mutex and condition
	 * variables contained. */
	slurm_mutex_lock(cancel_info->num_active_threads_lock);
	*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
	(*(cancel_info->num_active_threads))--;
	slurm_cond_signal(cancel_info->num_active_threads_cond);
	slurm_mutex_unlock(cancel_info->num_active_threads_lock);

	xfree(cancel_info->job_id_str);
	xfree(cancel_info);
	return NULL;
}
Beispiel #4
0
extern int sacctmgr_modify_qos(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_qos_cond_t *qos_cond = xmalloc(sizeof(slurmdb_qos_cond_t));
	slurmdb_qos_rec_t *qos = xmalloc(sizeof(slurmdb_qos_rec_t));
	int i=0;
	int cond_set = 0, rec_set = 0, set = 0;
	List ret_list = NULL;

	slurmdb_init_qos_rec(qos, 0, NO_VAL);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) {
			i++;
			cond_set += _set_cond(&i, argc, argv, qos_cond, NULL);

		} else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) {
			i++;
			rec_set += _set_rec(&i, argc, argv, NULL, qos);
		} else {
			cond_set += _set_cond(&i, argc, argv, qos_cond, NULL);
		}
	}

	if (exit_code) {
		slurmdb_destroy_qos_cond(qos_cond);
		slurmdb_destroy_qos_rec(qos);
		return SLURM_ERROR;
	} else if (!rec_set) {
		exit_code=1;
		fprintf(stderr, " You didn't give me anything to set\n");
		slurmdb_destroy_qos_cond(qos_cond);
		slurmdb_destroy_qos_rec(qos);
		return SLURM_ERROR;
	} else if (!cond_set) {
		if (!commit_check("You didn't set any conditions with 'WHERE'.\n"
				 "Are you sure you want to continue?")) {
			printf("Aborted\n");
			slurmdb_destroy_qos_cond(qos_cond);
			slurmdb_destroy_qos_rec(qos);
			return SLURM_SUCCESS;
		}
	}

	// Special case:  reset raw usage only
	if (qos->usage) {
		rc = SLURM_ERROR;
		if (qos->usage->usage_raw == 0.0)
			rc = sacctmgr_remove_qos_usage(qos_cond);
		else
			error("Raw usage can only be set to 0 (zero)");

		slurmdb_destroy_qos_cond(qos_cond);
		slurmdb_destroy_qos_rec(qos);
		return rc;
	}

	notice_thread_init();

	ret_list = acct_storage_g_modify_qos(db_conn, my_uid, qos_cond, qos);
	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = list_iterator_create(ret_list);
		printf(" Modified qos...\n");
		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		set = 1;
	} else if (ret_list) {
		printf(" Nothing modified\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		rc = SLURM_ERROR;
	}

	if (ret_list)
		list_destroy(ret_list);

	notice_thread_fini();

	if (set) {
		if (commit_check("Would you like to commit changes?"))
			acct_storage_g_commit(db_conn, 1);
		else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	}

	slurmdb_destroy_qos_cond(qos_cond);
	slurmdb_destroy_qos_rec(qos);

	return rc;
}
Beispiel #5
0
/*
 * slurm_allocate_resources_blocking
 *	allocate resources for a job request.  This call will block until
 *	the allocation is granted, or the specified timeout limit is reached.
 * IN req - description of resource allocation request
 * IN timeout - amount of time, in seconds, to wait for a response before
 * 	giving up.
 *	A timeout of zero will wait indefinitely.
 * IN pending_callback - If the allocation cannot be granted immediately,
 *      the controller will put the job in the PENDING state.  If
 *      pending callback is not NULL, it will be called with the job_id
 *      of the pending job as the sole parameter.
 *
 * RET allocation structure on success, NULL on error set errno to
 *	indicate the error (errno will be ETIMEDOUT if the timeout is reached
 *      with no allocation granted)
 * NOTE: free the response using slurm_free_resource_allocation_response_msg()
 */
resource_allocation_response_msg_t *
slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
				   time_t timeout,
				   void(*pending_callback)(uint32_t job_id))
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	resource_allocation_response_msg_t *resp = NULL;
	char *hostname = NULL;
	uint32_t job_id;
	job_desc_msg_t *req;
	listen_t *listen = NULL;
	int errnum = SLURM_SUCCESS;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	/* make a copy of the user's job description struct so that we
	 * can make changes before contacting the controller */
	req = (job_desc_msg_t *)xmalloc(sizeof(job_desc_msg_t));
	if (req == NULL)
		return NULL;
	memcpy(req, user_req, sizeof(job_desc_msg_t));

	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if (user_req->alloc_node != NULL) {
		req->alloc_node = xstrdup(user_req->alloc_node);
	} else if ((hostname = xshort_hostname()) != NULL) {
		req->alloc_node = hostname;
	} else {
		error("Could not get local hostname,"
		      " forcing immediate allocation mode.");
		req->immediate = 1;
	}

	if (!req->immediate) {
		listen = _create_allocation_response_socket(hostname);
		if (listen == NULL) {
			xfree(req);
			return NULL;
		}
		req->alloc_resp_port = listen->port;
	}

	req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);

	if (rc == SLURM_SOCKET_ERROR) {
		int errnum = errno;
		destroy_forward(&req_msg.forward);
		destroy_forward(&resp_msg.forward);
		if (!req->immediate)
			_destroy_allocation_response_socket(listen);
		xfree(req);
		errno = errnum;
		return NULL;
	}

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0) {
			/* will reach this when the allocation fails */
			errnum = errno;
		} else {
			/* shouldn't get here */
			errnum = -1;
		}
		break;
	case RESPONSE_RESOURCE_ALLOCATION:
		/* Yay, the controller has acknowledged our request!  But did
		   we really get an allocation yet? */
		resp = (resource_allocation_response_msg_t *) resp_msg.data;
		if (resp->node_cnt > 0) {
			/* yes, allocation has been granted */
			errno = SLURM_PROTOCOL_SUCCESS;
		} else if (!req->immediate) {
			if (resp->error_code != SLURM_SUCCESS)
				info("%s", slurm_strerror(resp->error_code));
			/* no, we need to wait for a response */
			job_id = resp->job_id;
			slurm_free_resource_allocation_response_msg(resp);
			if (pending_callback != NULL)
				pending_callback(job_id);
 			resp = _wait_for_allocation_response(job_id, listen,
							     timeout);
			/* If NULL, we didn't get the allocation in
			   the time desired, so just free the job id */
			if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) {
				errnum = errno;
				slurm_complete_job(job_id, -1);
			}
		}
		break;
	default:
		errnum = SLURM_UNEXPECTED_MSG_ERROR;
		resp = NULL;
	}

	destroy_forward(&req_msg.forward);
	destroy_forward(&resp_msg.forward);
	if (!req->immediate)
		_destroy_allocation_response_socket(listen);
	xfree(req);
	errno = errnum;
	return resp;
}
Beispiel #6
0
extern int sacctmgr_add_account(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	int i=0;
	ListIterator itr = NULL, itr_c = NULL;
	slurmdb_account_rec_t *acct = NULL;
	slurmdb_assoc_rec_t *assoc = NULL;
	slurmdb_assoc_cond_t assoc_cond;
	List name_list = list_create(slurm_destroy_char);
	List cluster_list = list_create(slurm_destroy_char);
	char *cluster = NULL;
	char *name = NULL;
	List acct_list = NULL;
	List assoc_list = NULL;
	List local_assoc_list = NULL;
	List local_account_list = NULL;
	char *acct_str = NULL;
	char *assoc_str = NULL;
	int limit_set = 0;
	slurmdb_account_rec_t *start_acct =
		xmalloc(sizeof(slurmdb_account_rec_t));
	slurmdb_assoc_rec_t *start_assoc =
		xmalloc(sizeof(slurmdb_assoc_rec_t));

	slurmdb_init_assoc_rec(start_assoc, 0);

	for (i = 0; i < argc; i++) {
		int command_len = strlen(argv[i]);
		if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5))
		    || !xstrncasecmp(argv[i], "Set", MAX(command_len, 3)))
			i++;
		limit_set += _set_rec(&i, argc, argv, name_list, cluster_list,
				      start_acct, start_assoc);
	}
	if (exit_code) {
		slurmdb_destroy_assoc_rec(start_assoc);
		slurmdb_destroy_account_rec(start_acct);
		return SLURM_ERROR;
	}

	if (!name_list || !list_count(name_list)) {
		FREE_NULL_LIST(name_list);
		FREE_NULL_LIST(cluster_list);
		slurmdb_destroy_assoc_rec(start_assoc);
		slurmdb_destroy_account_rec(start_acct);
		exit_code = 1;
		fprintf(stderr, " Need name of account to add.\n");
		return SLURM_SUCCESS;
	} else {
		slurmdb_account_cond_t account_cond;
		memset(&account_cond, 0, sizeof(slurmdb_account_cond_t));
		memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t));

		assoc_cond.acct_list = name_list;
		account_cond.assoc_cond = &assoc_cond;

		local_account_list = slurmdb_accounts_get(
			db_conn, &account_cond);
	}

	if (!local_account_list) {
		exit_code = 1;
		fprintf(stderr, " Problem getting accounts from database.  "
			"Contact your admin.\n");
		FREE_NULL_LIST(name_list);
		FREE_NULL_LIST(cluster_list);
		slurmdb_destroy_assoc_rec(start_assoc);
		slurmdb_destroy_account_rec(start_acct);
		return SLURM_ERROR;
	}

	if (!start_assoc->parent_acct)
		start_assoc->parent_acct = xstrdup("root");

	if (!cluster_list || !list_count(cluster_list)) {
		slurmdb_cluster_rec_t *cluster_rec = NULL;
		List tmp_list =
			slurmdb_clusters_get(db_conn, NULL);
		if (!tmp_list) {
			exit_code=1;
			fprintf(stderr,
				" Problem getting clusters from database.  "
				"Contact your admin.\n");
			FREE_NULL_LIST(name_list);
			FREE_NULL_LIST(cluster_list);
			slurmdb_destroy_assoc_rec(start_assoc);
			slurmdb_destroy_account_rec(start_acct);
			FREE_NULL_LIST(local_account_list);
			return SLURM_ERROR;
		}

		if (!list_count(tmp_list)) {
			exit_code=1;
			fprintf(stderr,
				"  Can't add accounts, no cluster "
				"defined yet.\n"
				" Please contact your administrator.\n");
			FREE_NULL_LIST(name_list);
			FREE_NULL_LIST(cluster_list);
			slurmdb_destroy_assoc_rec(start_assoc);
			slurmdb_destroy_account_rec(start_acct);
			FREE_NULL_LIST(local_account_list);
			return SLURM_ERROR;
		}
		if (!cluster_list)
			list_create(slurm_destroy_char);
		else
			list_flush(cluster_list);

		itr_c = list_iterator_create(tmp_list);
		while((cluster_rec = list_next(itr_c))) {
			list_append(cluster_list, xstrdup(cluster_rec->name));
		}
		list_iterator_destroy(itr_c);
		FREE_NULL_LIST(tmp_list);
	} else if (sacctmgr_validate_cluster_list(cluster_list)
		   != SLURM_SUCCESS) {
		slurmdb_destroy_assoc_rec(start_assoc);
		slurmdb_destroy_account_rec(start_acct);
		FREE_NULL_LIST(local_account_list);

		return SLURM_ERROR;
	}


	acct_list = list_create(slurmdb_destroy_account_rec);
	assoc_list = list_create(slurmdb_destroy_assoc_rec);

	memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t));

	assoc_cond.acct_list = list_create(NULL);
	itr = list_iterator_create(name_list);
	while((name = list_next(itr)))
		list_append(assoc_cond.acct_list, name);
	list_iterator_destroy(itr);
	list_append(assoc_cond.acct_list, start_assoc->parent_acct);

	assoc_cond.cluster_list = cluster_list;
	local_assoc_list = slurmdb_associations_get(
		db_conn, &assoc_cond);
	FREE_NULL_LIST(assoc_cond.acct_list);
	if (!local_assoc_list) {
		exit_code=1;
		fprintf(stderr, " Problem getting associations from database.  "
			"Contact your admin.\n");
		FREE_NULL_LIST(name_list);
		FREE_NULL_LIST(cluster_list);
		slurmdb_destroy_assoc_rec(start_assoc);
		slurmdb_destroy_account_rec(start_acct);
		FREE_NULL_LIST(local_account_list);
		return SLURM_ERROR;
	}

	itr = list_iterator_create(name_list);
	while((name = list_next(itr))) {
		if (!name[0]) {
			exit_code=1;
			fprintf(stderr, " No blank names are "
				"allowed when adding.\n");
			rc = SLURM_ERROR;
			continue;
		}

		acct = NULL;
		if (!sacctmgr_find_account_from_list(local_account_list, name)) {
			acct = xmalloc(sizeof(slurmdb_account_rec_t));
			acct->assoc_list =
				list_create(slurmdb_destroy_assoc_rec);
			acct->name = xstrdup(name);
			if (start_acct->description)
				acct->description =
					xstrdup(start_acct->description);
			else
				acct->description = xstrdup(name);

			if (start_acct->organization)
				acct->organization =
					xstrdup(start_acct->organization);
			else if (xstrcmp(start_assoc->parent_acct, "root"))
				acct->organization =
					xstrdup(start_assoc->parent_acct);
			else
				acct->organization = xstrdup(name);

			xstrfmtcat(acct_str, "  %s\n", name);
			list_append(acct_list, acct);
		}

		itr_c = list_iterator_create(cluster_list);
		while((cluster = list_next(itr_c))) {
			if (sacctmgr_find_account_base_assoc_from_list(
				   local_assoc_list, name, cluster)) {
				//printf(" already have this assoc\n");
				continue;
			}
			if (!sacctmgr_find_account_base_assoc_from_list(
				   local_assoc_list, start_assoc->parent_acct,
				   cluster)) {
				exit_code=1;
				fprintf(stderr, " Parent account '%s' "
					"doesn't exist on "
					"cluster %s\n"
					"        Contact your admin "
					"to add this account.\n",
					start_assoc->parent_acct, cluster);
				continue;
			}

			assoc = xmalloc(sizeof(slurmdb_assoc_rec_t));
			slurmdb_init_assoc_rec(assoc, 0);
			assoc->acct = xstrdup(name);
			assoc->cluster = xstrdup(cluster);
			assoc->def_qos_id = start_assoc->def_qos_id;

			assoc->parent_acct = xstrdup(start_assoc->parent_acct);
			assoc->shares_raw = start_assoc->shares_raw;

			slurmdb_copy_assoc_rec_limits(assoc, start_assoc);

			if (acct)
				list_append(acct->assoc_list, assoc);
			else
				list_append(assoc_list, assoc);
			xstrfmtcat(assoc_str,
				   "  A = %-10.10s"
				   " C = %-10.10s\n",
				   assoc->acct,
				   assoc->cluster);

		}
		list_iterator_destroy(itr_c);
	}
	list_iterator_destroy(itr);
	FREE_NULL_LIST(local_account_list);
	FREE_NULL_LIST(local_assoc_list);


	if (!list_count(acct_list) && !list_count(assoc_list)) {
		printf(" Nothing new added.\n");
		rc = SLURM_ERROR;
		goto end_it;
	} else if (!assoc_str) {
		exit_code=1;
		fprintf(stderr, " No associations created.\n");
		goto end_it;
	}

	if (acct_str) {
		printf(" Adding Account(s)\n%s", acct_str);
		printf(" Settings\n");
		if (start_acct->description)
			printf("  Description     = %s\n",
			       start_acct->description);
		else
			printf("  Description     = %s\n", "Account Name");

		if (start_acct->organization)
			printf("  Organization    = %s\n",
			       start_acct->organization);
		else
			printf("  Organization    = %s\n",
			       "Parent/Account Name");

		xfree(acct_str);
	}

	if (assoc_str) {
		printf(" Associations\n%s", assoc_str);
		xfree(assoc_str);
	}

	if (limit_set) {
		printf(" Settings\n");
		sacctmgr_print_assoc_limits(start_assoc);
	}

	notice_thread_init();
	if (list_count(acct_list))
		rc = slurmdb_accounts_add(db_conn, acct_list);


	if (rc == SLURM_SUCCESS) {
		if (list_count(assoc_list))
			rc = slurmdb_associations_add(db_conn, assoc_list);
	} else {
		exit_code=1;
		fprintf(stderr, " Problem adding accounts: %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
		notice_thread_fini();
		goto end_it;
	}
	notice_thread_fini();

	if (rc == SLURM_SUCCESS) {
		if (commit_check("Would you like to commit changes?")) {
			slurmdb_connection_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			slurmdb_connection_commit(db_conn, 0);
		}
	} else {
		exit_code=1;
		fprintf(stderr,
			" error: Problem adding account associations: %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
	}

end_it:
	FREE_NULL_LIST(name_list);
	FREE_NULL_LIST(cluster_list);
	FREE_NULL_LIST(acct_list);
	FREE_NULL_LIST(assoc_list);

	slurmdb_destroy_assoc_rec(start_assoc);
	slurmdb_destroy_account_rec(start_acct);
	return rc;
}
Beispiel #7
0
extern int sacctmgr_delete_qos(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_qos_cond_t *qos_cond =
		xmalloc(sizeof(slurmdb_qos_cond_t));
	int i=0;
	List ret_list = NULL;
	int set = 0;

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp (argv[i], "Set", MAX(command_len, 3)))
			i++;
		set += _set_cond(&i, argc, argv, qos_cond, NULL);
	}

	if (!set) {
		exit_code=1;
		fprintf(stderr,
			" No conditions given to remove, not executing.\n");
		slurmdb_destroy_qos_cond(qos_cond);
		return SLURM_ERROR;
	} else if (set == -1) {
		slurmdb_destroy_qos_cond(qos_cond);
		return SLURM_ERROR;
	}

	if (!g_qos_list)
		g_qos_list = acct_storage_g_get_qos(
			db_conn, my_uid, NULL);

	notice_thread_init();
	ret_list = acct_storage_g_remove_qos(db_conn, my_uid, qos_cond);
	notice_thread_fini();
	slurmdb_destroy_qos_cond(qos_cond);

	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = NULL;

		/* Check to see if person is trying to remove a default
		 * qos of an association.  _isdefault only works with the
		 * output from acct_storage_g_remove_qos, and
		 * with a previously got g_qos_list.
		 */
		if (_isdefault(ret_list)) {
			exit_code=1;
			fprintf(stderr, " Please either remove the qos' listed "
				"above from list and resubmit,\n"
				" or change the default qos to "
				"remove the qos.\n"
				" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
			goto end_it;
		}

		itr = list_iterator_create(ret_list);
		printf(" Deleting QOS(s)...\n");

		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else if (ret_list) {
		printf(" Nothing deleted\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		rc = SLURM_ERROR;
	}

end_it:
	if (ret_list)
		list_destroy(ret_list);

	return rc;
}
Beispiel #8
0
static void _admin_front_end(GtkTreeModel *model, GtkTreeIter *iter, char *type,
			     char *node_list)
{
	uint16_t state = NO_VAL16;
	update_front_end_msg_t front_end_update_msg;
	char *new_type = NULL, *reason = NULL;
	char tmp_char[100];
	char *lower;
	int rc;
	GtkWidget *label = NULL;
	GtkWidget *entry = NULL;
	GtkWidget *popup = NULL;

	if (cluster_flags & CLUSTER_FLAG_FED) {
		display_fed_disabled_popup(type);
		global_entry_changed = 0;
		return;
	}

	popup = gtk_dialog_new_with_buttons(
		type,
		GTK_WINDOW(main_window),
		GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT,
		NULL);

	gtk_window_set_type_hint(GTK_WINDOW(popup),
				 GDK_WINDOW_TYPE_HINT_NORMAL);

	gtk_window_set_transient_for(GTK_WINDOW(popup), NULL);

	label = gtk_dialog_add_button(GTK_DIALOG(popup),
				      GTK_STOCK_YES, GTK_RESPONSE_OK);
	gtk_window_set_default(GTK_WINDOW(popup), label);
	gtk_dialog_add_button(GTK_DIALOG(popup),
			      GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL);

	if (!xstrncasecmp("Drain", type, 5)) {
		new_type = "DRAIN";
		reason = "\n\nPlease enter reason.";
		state = NODE_STATE_DRAIN;
		entry = create_entry();
	} else if (!xstrncasecmp("Resume", type, 6)) {
		new_type = "RESUME";
		reason = "";
		state = NODE_RESUME;
	}
	snprintf(tmp_char, sizeof(tmp_char),
		 "Are you sure you want to set state of front end node %s "
		 "to %s?%s", node_list, new_type, reason);
	label = gtk_label_new(tmp_char);

	gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox),
			   label, false, false, 0);
	if (entry)
		gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox),
				   entry, true, true, 0);
	gtk_widget_show_all(popup);
	rc = gtk_dialog_run (GTK_DIALOG(popup));

	slurm_init_update_front_end_msg(&front_end_update_msg);

	if (rc == GTK_RESPONSE_OK) {
		front_end_update_msg.name = node_list;
		front_end_update_msg.node_state = state;
		if (entry) {
			front_end_update_msg.reason = xstrdup(
				gtk_entry_get_text(GTK_ENTRY(entry)));
			if (!front_end_update_msg.reason ||
			    !strlen(front_end_update_msg.reason)) {
				lower = g_strdup_printf(
					"You need a reason to do that.");
				display_edit_note(lower);
				g_free(lower);
				goto end_it;
			}
			rc = uid_from_string(getlogin(),
					     &front_end_update_msg.reason_uid);
			if (rc < 0)
				front_end_update_msg.reason_uid = getuid();
		}

		rc = slurm_update_front_end(&front_end_update_msg);
		if (rc == SLURM_SUCCESS) {
			lower = g_strdup_printf(
				"Nodes %s updated successfully.",
				node_list);
			display_edit_note(lower);
			g_free(lower);
		} else {
			lower = g_strdup_printf(
				"Problem updating nodes %s: %s",
				node_list, slurm_strerror(rc));
			display_edit_note(lower);
			g_free(lower);
		}
	}

end_it:
	global_entry_changed = 0;
	xfree(front_end_update_msg.reason);
	gtk_widget_destroy(popup);
	if (got_edit_signal) {
		type = got_edit_signal;
		got_edit_signal = NULL;
		_admin_front_end(model, iter, type, node_list);
		xfree(type);
	}
	return;
}
Beispiel #9
0
extern void get_info_front_end(GtkTable *table, display_data_t *display_data)
{
	int error_code = SLURM_SUCCESS;
	List info_list = NULL;
	static int view = -1;
	static front_end_info_msg_t *front_end_info_ptr = NULL;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	static GtkWidget *display_widget = NULL;
	int changed = 1, j;
	ListIterator itr = NULL;
	GtkTreePath *path = NULL;
	static bool set_opts = false;

	if (!set_opts)
		set_page_opts(FRONT_END_PAGE, display_data_front_end,
			      SORTID_CNT, _initial_page_opts);
	set_opts = true;

	/* reset */
	if (!table && !display_data) {
		if (display_widget)
			gtk_widget_destroy(display_widget);
		display_widget = NULL;
		front_end_info_ptr = NULL;
		goto reset_curs;
	}

	if (display_data)
		local_display_data = display_data;
	if (!table) {
		display_data_front_end->set_menu = local_display_data->set_menu;
		goto reset_curs;
	}
	if (cluster_flags & CLUSTER_FLAG_FED) {
		view = ERROR_VIEW;
		if (display_widget)
			gtk_widget_destroy(display_widget);
		label = gtk_label_new("Not available in a federated view");
		gtk_table_attach_defaults(GTK_TABLE(table), label, 0, 1, 0, 1);
		gtk_widget_show(label);
		display_widget = gtk_widget_ref(label);
		goto end_it;
	}

	if (display_widget && toggled) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
		goto display_it;
	}

	error_code = get_new_info_front_end(&front_end_info_ptr, force_refresh);
	if (error_code == SLURM_NO_CHANGE_IN_DATA) {
		changed = 0;
	} else if (error_code != SLURM_SUCCESS) {
		if (view == ERROR_VIEW)
			goto end_it;
		if (display_widget)
			gtk_widget_destroy(display_widget);
		view = ERROR_VIEW;
		sprintf(error_char, "slurm_load_front_end: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(table, label, 0, 1, 0, 1);
		gtk_widget_show(label);
		display_widget = gtk_widget_ref(GTK_WIDGET(label));
		goto end_it;
	}

display_it:
	info_list = _create_front_end_info_list(front_end_info_ptr, changed);
	if (!info_list)
		goto reset_curs;
	/* set up the grid */
	if (display_widget && GTK_IS_TREE_VIEW(display_widget) &&
	    gtk_tree_selection_count_selected_rows(
		   gtk_tree_view_get_selection(
			   GTK_TREE_VIEW(display_widget)))) {
		GtkTreeViewColumn *focus_column = NULL;
		/* highlight the correct nodes from the last selection */
		gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget),
					 &path, &focus_column);
	}
	if (!path) {
		sview_front_end_info_t *fe_ptr;
		itr = list_iterator_create(info_list);
		while ((fe_ptr = list_next(itr))) {
			j = 0;
			while (fe_ptr->node_inx[j] >= 0) {
				change_grid_color(grid_button_list,
						  fe_ptr->node_inx[j],
						  fe_ptr->node_inx[j+1],
						  fe_ptr->color_inx,
						  true, 0);
				j += 2;
			}
		}
		list_iterator_destroy(itr);
		change_grid_color(grid_button_list, -1, -1,
				  MAKE_WHITE, true, 0);
	} else {
		highlight_grid(GTK_TREE_VIEW(display_widget),
			       SORTID_NODE_INX, SORTID_COLOR_INX,
			       grid_button_list);
		gtk_tree_path_free(path);
	}

	if (view == ERROR_VIEW && display_widget) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
	}
	if (!display_widget) {
		tree_view = create_treeview(local_display_data,
					    &grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		display_widget = gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		   to the treestore we don't really care about
		   the return value */
		create_treestore(tree_view, display_data_front_end,
				 SORTID_CNT, SORTID_NAME, SORTID_COLOR);
	}

	view = INFO_VIEW;
	_update_info_front_end(info_list, GTK_TREE_VIEW(display_widget));
end_it:
	toggled = false;
	force_refresh = false;
reset_curs:
	if (main_window && main_window->window)
		gdk_window_set_cursor(main_window->window, NULL);
	return;
}
Beispiel #10
0
extern int sacctmgr_list_txn(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_txn_cond_t *txn_cond = xmalloc(sizeof(slurmdb_txn_cond_t));
	List txn_list = NULL;
	slurmdb_txn_rec_t *txn = NULL;
	int i=0;
	ListIterator itr = NULL;
	ListIterator itr2 = NULL;
	int field_count = 0;

	print_field_t *field = NULL;

	List format_list = list_create(slurm_destroy_char);
	List print_fields_list; /* types are of print_field_t */

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp (argv[i], "Set", MAX(command_len, 3)))
			i++;
		_set_cond(&i, argc, argv, txn_cond, format_list);
	}

	if (exit_code) {
		slurmdb_destroy_txn_cond(txn_cond);
		list_destroy(format_list);
		return SLURM_ERROR;
	}

	if (!list_count(format_list)) {
		slurm_addto_char_list(format_list, "T,Action,Actor,Where,Info");
		if (txn_cond->with_assoc_info)
			slurm_addto_char_list(format_list,
					      "User,Account,Cluster");
	}

	print_fields_list = sacctmgr_process_format_list(format_list);
	list_destroy(format_list);

	if (exit_code) {
		list_destroy(print_fields_list);
		return SLURM_ERROR;
	}

	txn_list = acct_storage_g_get_txn(db_conn, my_uid, txn_cond);
	slurmdb_destroy_txn_cond(txn_cond);

	if (!txn_list) {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		list_destroy(print_fields_list);
		return SLURM_ERROR;
	}
	itr = list_iterator_create(txn_list);
	itr2 = list_iterator_create(print_fields_list);
	print_fields_header(print_fields_list);

	field_count = list_count(print_fields_list);

	while((txn = list_next(itr))) {
		int curr_inx = 1;
		while((field = list_next(itr2))) {
			switch(field->type) {
			case PRINT_ACCT:
				field->print_routine(field, txn->accts,
						     (curr_inx == field_count));
				break;
			case PRINT_ACTIONRAW:
				field->print_routine(
					field,
					txn->action,
					(curr_inx == field_count));
				break;
			case PRINT_ACTION:
				field->print_routine(
					field,
					slurmdbd_msg_type_2_str(txn->action,
								0),
					(curr_inx == field_count));
				break;
			case PRINT_ACTOR:
				field->print_routine(field,
						     txn->actor_name,
						     (curr_inx == field_count));
				break;
			case PRINT_CLUSTER:
				field->print_routine(field, txn->clusters,
						     (curr_inx == field_count));
				break;
			case PRINT_ID:
				field->print_routine(field,
						     txn->id,
						     (curr_inx == field_count));
				break;
			case PRINT_INFO:
				field->print_routine(field,
						     txn->set_info,
						     (curr_inx == field_count));
				break;
			case PRINT_TS:
				field->print_routine(field,
						     txn->timestamp,
						     (curr_inx == field_count));
				break;
			case PRINT_USER:
				field->print_routine(field, txn->users,
						     (curr_inx == field_count));
				break;
			case PRINT_WHERE:
				field->print_routine(field,
						     txn->where_query,
						     (curr_inx == field_count));
				break;
			default:
				field->print_routine(field, NULL,
						     (curr_inx == field_count));
					break;
			}
			curr_inx++;
		}
		list_iterator_reset(itr2);
		printf("\n");
	}

	list_iterator_destroy(itr2);
	list_iterator_destroy(itr);
	list_destroy(txn_list);
	list_destroy(print_fields_list);
	return rc;
}
Beispiel #11
0
/*
 * slurm_job_step_get_pids - get the complete list of pids for a given
 *      job step
 *
 * IN job_id
 * IN step_id
 * IN node_list, optional, if NULL then all nodes in step are returned.
 * OUT resp
 * RET SLURM_SUCCESS on success SLURM_ERROR else
 */
extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id,
				   char *node_list,
				   job_step_pids_response_msg_t **resp)
{
        int rc = SLURM_SUCCESS;
        slurm_msg_t req_msg;
        job_step_id_msg_t req;
        ListIterator itr;
        List ret_list = NULL;
        ret_data_info_t *ret_data_info = NULL;
	slurm_step_layout_t *step_layout = NULL;
	job_step_pids_response_msg_t *resp_out;
	bool created = 0;

	xassert(resp);

	if (!node_list) {
		if (!(step_layout =
		     slurm_job_step_layout_get(job_id, step_id))) {
			rc = errno;
			error("slurm_job_step_get_pids: "
			      "problem getting step_layout for %u.%u: %s",
			      job_id, step_id, slurm_strerror(rc));
			return rc;
		}
		node_list = step_layout->node_list;
	}

	if (!*resp) {
		resp_out = xmalloc(sizeof(job_step_pids_response_msg_t));
		*resp = resp_out;
		created = 1;
	} else
		resp_out = *resp;

        debug("slurm_job_step_get_pids: "
	      "getting pid information of job %u.%u on nodes %s",
              job_id, step_id, node_list);

	slurm_msg_t_init(&req_msg);

	memset(&req, 0, sizeof(job_step_id_msg_t));
        resp_out->job_id = req.job_id = job_id;
	resp_out->step_id = req.step_id = step_id;

	req_msg.msg_type = REQUEST_JOB_STEP_PIDS;
        req_msg.data = &req;

        if (!(ret_list = slurm_send_recv_msgs(node_list,
					     &req_msg, 0, false))) {
                error("slurm_job_step_get_pids: got an error no list returned");
                rc = SLURM_ERROR;
		if (created) {
			slurm_job_step_pids_response_msg_free(resp_out);
			*resp = NULL;
		}
		goto cleanup;
        }

        itr = list_iterator_create(ret_list);
        while((ret_data_info = list_next(itr))) {
                switch (ret_data_info->type) {
			case RESPONSE_JOB_STEP_PIDS:
				if (!resp_out->pid_list)
					resp_out->pid_list = list_create(
						slurm_free_job_step_pids);
				list_push(resp_out->pid_list,
					  ret_data_info->data);
				ret_data_info->data = NULL;
                              break;
                      case RESPONSE_SLURM_RC:
                              rc = slurm_get_return_code(ret_data_info->type,
                                                         ret_data_info->data);
                              error("slurm_job_step_get_pids: "
				    "there was an error with the "
				    "list pid request rc = %s",
                                    slurm_strerror(rc));
                              break;
                      default:
                              rc = slurm_get_return_code(ret_data_info->type,
                                                         ret_data_info->data);
                              error("slurm_job_step_get_pids: "
				    "unknown return given %d rc = %s",
                                    ret_data_info->type, slurm_strerror(rc));
                              break;
                }
        }
        list_iterator_destroy(itr);
        list_destroy(ret_list);

 	if (resp_out->pid_list)
		list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name);
cleanup:
	slurm_step_layout_destroy(step_layout);

        return rc;
}
Beispiel #12
0
static int
_handle_checkpoint_tasks(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	time_t timestamp;
	int len;
	char *image_dir = NULL;

	debug3("_handle_checkpoint_tasks for job %u.%u",
	       job->jobid, job->stepid);

	safe_read(fd, &timestamp, sizeof(time_t));
	safe_read(fd, &len, sizeof(int));
	if (len) {
		image_dir = xmalloc (len);
		safe_read(fd, image_dir, len); /* '\0' terminated */
	}

	debug3("  uid = %d", uid);
	if (uid != job->uid && !_slurm_authorized_user(uid)) {
		debug("checkpoint req from uid %ld for job %u.%u "
		      "owned by uid %ld",
		      (long)uid, job->jobid, job->stepid, (long)job->uid);
		rc = EPERM;
		goto done;
	}

	if (job->ckpt_timestamp &&
	    timestamp == job->ckpt_timestamp) {
		debug("duplicate checkpoint req for job %u.%u, "
		      "timestamp %ld. discarded.",
		      job->jobid, job->stepid, (long)timestamp);
		rc = ESLURM_ALREADY_DONE; /* EINPROGRESS? */
		goto done;
	}

	/*
	 * Sanity checks
	 */
	if (job->pgid <= (pid_t)1) {
		debug ("step %u.%u invalid [jmgr_pid:%d pgid:%u]",
		       job->jobid, job->stepid, job->jmgr_pid, job->pgid);
		rc = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	}

	/* set timestamp in case another request comes */
	job->ckpt_timestamp = timestamp;

	/* TODO: do we need job->ckpt_dir any more,
	 *	except for checkpoint/xlch? */
/*	if (! image_dir) { */
/*		image_dir = xstrdup(job->ckpt_dir); */
/*	} */

	/* call the plugin to send the request */
	if (checkpoint_signal_tasks(job, image_dir) != SLURM_SUCCESS) {
		rc = -1;
		verbose("Error sending checkpoint request to %u.%u: %s",
			job->jobid, job->stepid, slurm_strerror(rc));
	} else {
		verbose("Sent checkpoint request to %u.%u",
			job->jobid, job->stepid);
	}

	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code */
	safe_write(fd, &rc, sizeof(int));
	xfree(image_dir);
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
Beispiel #13
0
/* Modify a job:
 *	CMD=MODIFYJOB ARG=<jobid>
 *		[BANK=<name>;]
 *		[COMMENT=<whatever>;]
 *		[DEPEND=afterany:<jobid>;]
 *		[JOBNAME=<name>;]
 *		[MINSTARTTIME=<uts>;]
 *		[NODES=<number>;]
 *		[PARTITION=<name>;]
 *		[RFEATURES=<features>;]
 *		[TIMELIMT=<seconds>;]
 *		[VARIABLELIST=<env_vars>;]
 *		[GRES=<name:value>;]
 *		[WCKEY=<name>;]
 *
 * RET 0 on success, -1 on failure */
extern int	job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr, *start_ptr;
	char *host_ptr, *name_ptr, *part_ptr, *time_ptr, *tmp_char;
	char *comment_ptr, *feature_ptr, *env_ptr, *gres_ptr, *wckey_ptr;
	int i, slurm_rc;
	uint32_t jobid, new_node_cnt = 0, new_time_limit = 0;
	static char reply_msg[128];
	/* Locks: write job, read node and partition info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "MODIFYJOB lacks ARG=";
		error("wiki: MODIFYJOB lacks ARG=");
		return -1;
	}
	/* Change all parsed "=" to ":" then search for remaining "="
	 * and report results as unrecognized options */
	arg_ptr[3] = ':';
	arg_ptr += 4;
	jobid = strtoul(arg_ptr, &tmp_char, 10);
	if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: MODIFYJOB has invalid jobid");
		return -1;
	}
	bank_ptr    = strstr(cmd_ptr, "BANK=");
	comment_ptr = strstr(cmd_ptr, "COMMENT=");
	depend_ptr  = strstr(cmd_ptr, "DEPEND=");
	gres_ptr    = strstr(cmd_ptr, "GRES=");
	host_ptr    = strstr(cmd_ptr, "HOSTLIST=");
	name_ptr    = strstr(cmd_ptr, "JOBNAME=");
	start_ptr   = strstr(cmd_ptr, "MINSTARTTIME=");
	nodes_ptr   = strstr(cmd_ptr, "NODES=");
	part_ptr    = strstr(cmd_ptr, "PARTITION=");
	feature_ptr = strstr(cmd_ptr, "RFEATURES=");
	time_ptr    = strstr(cmd_ptr, "TIMELIMIT=");
	env_ptr     = strstr(cmd_ptr, "VARIABLELIST=");
	wckey_ptr   = strstr(cmd_ptr, "WCKEY=");
	if (bank_ptr) {
		bank_ptr[4] = ':';
		bank_ptr += 5;
		null_term(bank_ptr);
	}
	if (comment_ptr) {
		comment_ptr[7] = ':';
		comment_ptr += 8;
		if (comment_ptr[0] == '\"') {
			comment_ptr++;
			for (i=0; ; i++) {
				if (comment_ptr[i] == '\0')
					break;
				if (comment_ptr[i] == '\"') {
					comment_ptr[i] = '\0';
					break;
				}
			}
		} else if (comment_ptr[0] == '\'') {
			comment_ptr++;
			for (i=0; ; i++) {
				if (comment_ptr[i] == '\0')
					break;
				if (comment_ptr[i] == '\'') {
					comment_ptr[i] = '\0';
					break;
				}
			}
		} else
			null_term(comment_ptr);
	}
	if (depend_ptr) {
		depend_ptr[6] = ':';
		depend_ptr += 7;
		null_term(depend_ptr);
	}
	if (feature_ptr) {
		feature_ptr[9] = ':';
		feature_ptr += 10;
		null_term(feature_ptr);
	}
	if (gres_ptr) {
		gres_ptr[4] = ':';
		gres_ptr += 5;
		null_term(gres_ptr);
	}
	if (host_ptr) {
		host_ptr[8] = ':';
		host_ptr += 9;
		null_term(host_ptr);
	}
	if (name_ptr) {
		name_ptr[7] = ':';
		name_ptr += 8;
		if (name_ptr[0] == '\"') {
			name_ptr++;
			for (i=0; ; i++) {
				if (name_ptr[i] == '\0')
					break;
				if (name_ptr[i] == '\"') {
					name_ptr[i] = '\0';
					break;
				}
			}
		} else if (name_ptr[0] == '\'') {
			name_ptr++;
			for (i=0; ; i++) {
				if (name_ptr[i] == '\0')
					break;
				if (name_ptr[i] == '\'') {
					name_ptr[i] = '\0';
					break;
				}
			}
		} else
			null_term(name_ptr);
	}
	if (start_ptr) {
		start_ptr[12] = ':';
		start_ptr += 13;
		null_term(start_ptr);
	}
	if (nodes_ptr) {
		nodes_ptr[5] = ':';
		nodes_ptr += 6;
		new_node_cnt = strtoul(nodes_ptr, NULL, 10);
	}
	if (part_ptr) {
		part_ptr[9] = ':';
		part_ptr += 10;
		null_term(part_ptr);
	}
	if (time_ptr) {
		time_ptr[9] = ':';
		time_ptr += 10;
		new_time_limit = strtoul(time_ptr, NULL, 10);
	}
	if (env_ptr) {
		env_ptr[12] = ':';
		env_ptr += 13;
		null_term(env_ptr);
	}
	if (wckey_ptr) {
		wckey_ptr[5] = ':';
		wckey_ptr += 6;
		null_term(wckey_ptr);
	}

	/* Look for any un-parsed "=" ignoring anything after VARIABLELIST
	 * which is expected to contain "=" in its value*/
	tmp_char = strchr(cmd_ptr, '=');
	if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) {
		tmp_char[0] = '\0';
		while (tmp_char[-1] && (!isspace(tmp_char[-1])))
			tmp_char--;
		error("wiki: Invalid MODIFYJOB option %s", tmp_char);
	}

	lock_slurmctld(job_write_lock);
	slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr,
			new_node_cnt, part_ptr, new_time_limit, name_ptr,
			start_ptr, feature_ptr, env_ptr, comment_ptr,
			gres_ptr, wckey_ptr);
	unlock_slurmctld(job_write_lock);
	if (slurm_rc != SLURM_SUCCESS) {
		*err_code = -700;
		*err_msg = slurm_strerror(slurm_rc);
		error("wiki: Failed to modify job %u (%m)", jobid);
		return -1;
	}

	snprintf(reply_msg, sizeof(reply_msg),
		"job %u modified successfully", jobid);
	*err_msg = reply_msg;
	return 0;
}
Beispiel #14
0
extern int sacctmgr_archive_load(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_archive_rec_t *arch_rec =
		xmalloc(sizeof(slurmdb_archive_rec_t));
	int i=0, command_len = 0;
	struct stat st;

	for (i=0; i<argc; i++) {
		int end = parse_option_end(argv[i]);
		if (!end)
			command_len=strlen(argv[i]);
		else {
			command_len=end-1;
			if (argv[i][end] == '=') {
				end++;
			}
		}

		if (!end
		   || !strncasecmp (argv[i], "File", MAX(command_len, 1))) {
			arch_rec->archive_file =
				strip_quotes(argv[i]+end, NULL, 0);
		} else if (!strncasecmp (argv[i], "Insert",
					 MAX(command_len, 2))) {
			arch_rec->insert = strip_quotes(argv[i]+end, NULL, 1);
		} else {
			exit_code=1;
			fprintf(stderr, " Unknown option: %s\n", argv[i]);
		}
	}

	if (exit_code) {
		slurmdb_destroy_archive_rec(arch_rec);
		return SLURM_ERROR;
	}

	if (arch_rec->archive_file) {
		char *fullpath;
		char cwd[MAXPATHLEN + 1];
		int  mode = R_OK;

		if ((getcwd(cwd, MAXPATHLEN)) == NULL)
			fatal("getcwd failed: %m");

		if ((fullpath = search_path(cwd, arch_rec->archive_file,
					    true, mode))) {
			xfree(arch_rec->archive_file);
			arch_rec->archive_file = fullpath;
		}

		if (stat(arch_rec->archive_file, &st) < 0) {
			exit_code = errno;
			fprintf(stderr, " load: Failed to stat %s: %m\n "
				"Note: For archive load, the file must be on "
				"the calling host.\n",
				arch_rec->archive_file);
			return SLURM_ERROR;
		}
	}

	rc = jobacct_storage_g_archive_load(db_conn, arch_rec);
	if (rc == SLURM_SUCCESS) {
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else {
		exit_code=1;
		fprintf(stderr, " Problem loading archive file: %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
	}

	slurmdb_destroy_archive_rec(arch_rec);

	return rc;
}
Beispiel #15
0
extern void get_job(void)
{
	int error_code = -1, i, recs;
	static int printed_jobs = 0;
	static int count = 0;
	static job_info_msg_t *job_info_ptr = NULL, *new_job_ptr = NULL;
	job_info_t *job_ptr = NULL;
	uint16_t show_flags = 0;
	bitstr_t *nodes_req = NULL;
	static uint16_t last_flags = 0;

	if (params.all_flag)
		show_flags |= SHOW_ALL;
	if (job_info_ptr) {
		if (show_flags != last_flags)
			job_info_ptr->last_update = 0;
		error_code = slurm_load_jobs(job_info_ptr->last_update,
					     &new_job_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_info_msg(job_info_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_job_ptr = job_info_ptr;
		}
	} else
		error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr,
					     show_flags);

	last_flags = show_flags;
	if (error_code) {
		if (quiet_flag != 1) {
			if (!params.commandline) {
				mvwprintw(text_win,
					  main_ycord, 1,
					  "slurm_load_jobs: %s",
					  slurm_strerror(slurm_get_errno()));
				main_ycord++;
			} else {
				printf("slurm_load_jobs: %s\n",
				       slurm_strerror(slurm_get_errno()));
			}
		}
	}

	if (!params.no_header)
		_print_header_job();

	if (new_job_ptr)
		recs = new_job_ptr->record_count;
	else
		recs = 0;

	if (!params.commandline)
		if ((text_line_cnt+printed_jobs) > count)
			text_line_cnt--;
	printed_jobs = 0;
	count = 0;

	if (params.hl)
		nodes_req = get_requested_node_bitmap();
	for (i = 0; i < recs; i++) {
		job_ptr = &(new_job_ptr->job_array[i]);
		if (!IS_JOB_PENDING(job_ptr)   && !IS_JOB_RUNNING(job_ptr) &&
		    !IS_JOB_SUSPENDED(job_ptr) && !IS_JOB_COMPLETING(job_ptr))
			continue;	/* job has completed */
		if (nodes_req) {
			int overlap = 0;
			bitstr_t *loc_bitmap = bit_alloc(bit_size(nodes_req));
			inx2bitstr(loc_bitmap, job_ptr->node_inx);
			overlap = bit_overlap(loc_bitmap, nodes_req);
			FREE_NULL_BITMAP(loc_bitmap);
			if (!overlap)
				continue;
		}

		if (job_ptr->node_inx[0] != -1) {
			int j = 0;
			job_ptr->num_nodes = 0;
			while (job_ptr->node_inx[j] >= 0) {
				job_ptr->num_nodes +=
					(job_ptr->node_inx[j + 1] + 1) -
					 job_ptr->node_inx[j];
				set_grid_inx(job_ptr->node_inx[j],
					     job_ptr->node_inx[j + 1], count);
				j += 2;
			}

			if (!params.commandline) {
				if ((count >= text_line_cnt) &&
				    (printed_jobs < (getmaxy(text_win) - 4))) {
					job_ptr->num_cpus =
						(int)letters[count%62];
					wattron(text_win,
						COLOR_PAIR(colors[count%6]));
					_print_text_job(job_ptr);
					wattroff(text_win,
						 COLOR_PAIR(colors[count%6]));
					printed_jobs++;
				}
			} else {
				job_ptr->num_cpus = (int)letters[count%62];
				_print_text_job(job_ptr);
			}
			count++;
		}
		if (count == 128)
			count = 0;
	}

	for (i = 0; i < recs; i++) {
		job_ptr = &(new_job_ptr->job_array[i]);

		if (!IS_JOB_PENDING(job_ptr))
			continue;	/* job has completed */

		if (!params.commandline) {
			if ((count>=text_line_cnt) &&
			    (printed_jobs < (getmaxy(text_win) - 4))) {
				xfree(job_ptr->nodes);
				job_ptr->nodes = xstrdup("waiting...");
				job_ptr->num_cpus = (int) letters[count%62];
				wattron(text_win,
					COLOR_PAIR(colors[count%6]));
				_print_text_job(job_ptr);
				wattroff(text_win,
					 COLOR_PAIR(colors[count%6]));
				printed_jobs++;
			}
		} else {
			xfree(job_ptr->nodes);
			job_ptr->nodes = xstrdup("waiting...");
			job_ptr->num_cpus = (int) letters[count%62];
			_print_text_job(job_ptr);
			printed_jobs++;
		}
		count++;

		if (count == 128)
			count = 0;
	}

	if (params.commandline && params.iterate)
		printf("\n");

	if (!params.commandline)
		main_ycord++;

	job_info_ptr = new_job_ptr;
	return;
}
Beispiel #16
0
extern void get_info_bb(GtkTable *table, display_data_t *display_data)
{
	int error_code = SLURM_SUCCESS;
	List info_list = NULL;
	static int view = -1;
	static burst_buffer_info_msg_t *bb_info_ptr = NULL;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	static GtkWidget *display_widget = NULL;
	GtkTreePath *path = NULL;
	static bool set_opts = FALSE;

	if (!set_opts) {
		set_page_opts(BB_PAGE, display_data_bb,
			      SORTID_CNT, _initial_page_opts);
	}
	set_opts = TRUE;

	/* reset */
	if (!table && !display_data) {
		if (display_widget)
			gtk_widget_destroy(display_widget);
		display_widget = NULL;
		bb_info_ptr = NULL;
		goto reset_curs;
	}

	if (display_data)
		local_display_data = display_data;
	if (!table) {
		display_data_bb->set_menu = local_display_data->set_menu;
		goto reset_curs;
	}
	if (display_widget && toggled) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
		goto display_it;
	}

	error_code = get_new_info_bb(&bb_info_ptr, force_refresh);

	if (error_code == SLURM_NO_CHANGE_IN_DATA) {
	} else if (error_code != SLURM_SUCCESS) {
		if (view == ERROR_VIEW)
			goto end_it;
		if (display_widget)
			gtk_widget_destroy(display_widget);
		view = ERROR_VIEW;
		sprintf(error_char, "slurm_load_reservations: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(table, label, 0, 1, 0, 1);
		gtk_widget_show(label);
		display_widget = gtk_widget_ref(GTK_WIDGET(label));
		goto end_it;
	}

display_it:
	info_list = _create_bb_info_list(bb_info_ptr);

	if (!info_list) {
		goto reset_curs;
	}

	/* set up the grid */
	if (display_widget && GTK_IS_TREE_VIEW(display_widget)
	    && gtk_tree_selection_count_selected_rows(
		    gtk_tree_view_get_selection(
			    GTK_TREE_VIEW(display_widget)))) {
		GtkTreeViewColumn *focus_column = NULL;
		/* highlight the correct nodes from the last selection */
		gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget),
					 &path, &focus_column);
	}

	change_grid_color(grid_button_list, -1, -1,
			  MAKE_WHITE, true, 0);

	if (view == ERROR_VIEW && display_widget) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
	}
	if (!display_widget) {
		tree_view = create_treeview(local_display_data,
					    &grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		display_widget = gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		   to the treestore we don't really care about
		   the return value */
		create_treestore(tree_view, display_data_bb,
				 SORTID_CNT, SORTID_NAME, SORTID_COLOR);

	}

	view = INFO_VIEW;
	_update_info_bb(info_list, GTK_TREE_VIEW(display_widget));
end_it:
	toggled = FALSE;
	force_refresh = FALSE;
reset_curs:
	if (main_window && main_window->window)
		gdk_window_set_cursor(main_window->window, NULL);

	return;
}
Beispiel #17
0
extern int sacctmgr_delete_account(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	slurmdb_account_cond_t *acct_cond =
		xmalloc(sizeof(slurmdb_account_cond_t));
	int i = 0;
	List ret_list = NULL, local_assoc_list = NULL;
	ListIterator itr = NULL;
	int cond_set = 0, prev_set = 0;

	for (i = 0; i < argc; i++) {
		int command_len = strlen(argv[i]);
		if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5))
		    || !xstrncasecmp(argv[i], "Set", MAX(command_len, 3)))
			i++;
		prev_set = _set_cond(&i, argc, argv, acct_cond, NULL);
		cond_set |= prev_set;
	}

	if (!cond_set) {
		exit_code = 1;
		fprintf(stderr,
			" No conditions given to remove, not executing.\n");
		slurmdb_destroy_account_cond(acct_cond);
		return SLURM_ERROR;
	}

	if (exit_code) {
		slurmdb_destroy_account_cond(acct_cond);
		return SLURM_ERROR;
	}

	if (!acct_cond->assoc_cond) {
		error("%s: Association condition is NULL", __func__);
		slurmdb_destroy_account_cond(acct_cond);
		return SLURM_ERROR;
	}

	/* check to see if person is trying to remove root account.  This is
	 * bad, and should not be allowed outside of deleting a cluster.
	 */
	if (acct_cond->assoc_cond
	   && acct_cond->assoc_cond->acct_list
	   && list_count(acct_cond->assoc_cond->acct_list)) {
		char *tmp_char = NULL;
		itr = list_iterator_create(acct_cond->assoc_cond->acct_list);
		while ((tmp_char = list_next(itr))) {
			if (!xstrcasecmp(tmp_char, "root"))
				break;
		}
		list_iterator_destroy(itr);
		if (tmp_char) {
			exit_code=1;
			fprintf(stderr, " You are not allowed to remove "
				"the root account.\n"
				" Use remove cluster instead.\n");
			slurmdb_destroy_account_cond(acct_cond);
			return SLURM_ERROR;
		}
	}

	acct_cond->assoc_cond->only_defs = 1;
	local_assoc_list = slurmdb_associations_get(
		db_conn, acct_cond->assoc_cond);
	acct_cond->assoc_cond->only_defs = 0;

	notice_thread_init();
	if (cond_set == 1) {
		ret_list = slurmdb_accounts_remove(
			db_conn, acct_cond);
	} else if (cond_set & 2) {
		ret_list = slurmdb_associations_remove(
			db_conn, acct_cond->assoc_cond);
	}
	rc = errno;
	notice_thread_fini();
	slurmdb_destroy_account_cond(acct_cond);

	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = NULL;

		/* Check to see if person is trying to remove a default
		 * account of a user.  _isdefault only works with the
		 * output from slurmdb_accounts_remove, and
		 * with a previously got assoc_list.
		 */
		if (_isdefault(cond_set, ret_list, local_assoc_list)) {
			exit_code=1;
			fprintf(stderr, " Please either remove the "
				"accounts listed "
				"above from list and resubmit,\n"
				" or change these users default account to "
				"remove the account(s).\n"
				" Changes Discarded\n");
			slurmdb_connection_commit(db_conn, 0);
			goto end_it;
		}
		itr = list_iterator_create(ret_list);
		/* If there were jobs running with an association to
		   be deleted, don't.
		*/
		if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) {
			fprintf(stderr, " Error with request: %s\n",
				slurm_strerror(rc));
			while((object = list_next(itr))) {
				fprintf(stderr,"  %s\n", object);
			}
			slurmdb_connection_commit(db_conn, 0);
			goto end_it;
		}

		if (cond_set == 1) {
			printf(" Deleting accounts...\n");
		} else if (cond_set & 2) {
			printf(" Deleting account associations...\n");
		}
		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		if (commit_check("Would you like to commit changes?")) {
			slurmdb_connection_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			slurmdb_connection_commit(db_conn, 0);
		}
	} else if (ret_list) {
		printf(" Nothing deleted\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));

		rc = SLURM_ERROR;
	}

end_it:

	FREE_NULL_LIST(ret_list);
	FREE_NULL_LIST(local_assoc_list);

	return rc;
}
Beispiel #18
0
/* Function for full information about a Burst Buffer */
extern void specific_info_bb(popup_info_t *popup_win)
{
	int bb_error_code = SLURM_SUCCESS;
	static burst_buffer_info_msg_t *bb_info_ptr = NULL;
	specific_info_t *spec_info = popup_win->spec_info;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	List bb_list = NULL;
	List send_bb_list = NULL;
	sview_bb_info_t *sview_bb_info_ptr = NULL;
	int i=-1;
	ListIterator itr = NULL;

	if (!spec_info->display_widget) {
		setup_popup_info(popup_win, display_data_bb, SORTID_CNT);
	}

	if (spec_info->display_widget && popup_win->toggled) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
		goto display_it;
	}

	if ((bb_error_code =
	     get_new_info_bb(&bb_info_ptr, popup_win->force_refresh))
	    == SLURM_NO_CHANGE_IN_DATA) {
		if (!spec_info->display_widget || spec_info->view == ERROR_VIEW)
			goto display_it;
	} else if (bb_error_code != SLURM_SUCCESS) {
		if (spec_info->view == ERROR_VIEW)
			goto end_it;
		spec_info->view = ERROR_VIEW;
		if (spec_info->display_widget)
			gtk_widget_destroy(spec_info->display_widget);
		sprintf(error_char, "get_new_info_bb: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(popup_win->table,
					  label,
					  0, 1, 0, 1);
		gtk_widget_show(label);
		spec_info->display_widget = gtk_widget_ref(label);
		goto end_it;
	}

display_it:

	bb_list = _create_bb_info_list(bb_info_ptr);

	if (!bb_list)
		return;

	if (spec_info->view == ERROR_VIEW && spec_info->display_widget) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
	}
	if (spec_info->type != INFO_PAGE && !spec_info->display_widget) {
		tree_view = create_treeview(local_display_data,
					    &popup_win->grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		spec_info->display_widget =
			gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(popup_win->table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		 * to the treestore we don't really care about
		 * the return value */
		create_treestore(tree_view, popup_win->display_data,
				 SORTID_CNT, SORTID_NAME, SORTID_COLOR);
	}

	setup_popup_grid_list(popup_win);

	spec_info->view = INFO_VIEW;
	if (spec_info->type == INFO_PAGE) {
		_display_info_bb(bb_list, popup_win);
		goto end_it;
	}

	/* just linking to another list, don't free the inside, just the list */
	send_bb_list = list_create(NULL);
	itr = list_iterator_create(bb_list);
	i = -1;
	/* Set up additional menu options(ie the right click menu stuff) */
	while ((sview_bb_info_ptr = list_next(itr))) {
		i++;
		/* Since we will not use any of these pages we will */
		/* leave them blank */
		switch(spec_info->type) {
		case PART_PAGE:
		case BLOCK_PAGE:
		case NODE_PAGE:
		case JOB_PAGE:
		case RESV_PAGE:
		default:
			g_print("Unknown type %d\n", spec_info->type);
			continue;
		}
		list_push(send_bb_list, sview_bb_info_ptr);
	}
	list_iterator_destroy(itr);
	post_setup_popup_grid_list(popup_win);

	_update_info_bb(send_bb_list,
			  GTK_TREE_VIEW(spec_info->display_widget));
	FREE_NULL_LIST(send_bb_list);
end_it:
	popup_win->toggled = 0;
	popup_win->force_refresh = 0;

	return;
}
Beispiel #19
0
extern int sacctmgr_modify_account(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	slurmdb_account_cond_t *acct_cond =
		xmalloc(sizeof(slurmdb_account_cond_t));
	slurmdb_account_rec_t *acct = xmalloc(sizeof(slurmdb_account_rec_t));
	slurmdb_assoc_rec_t *assoc =
		xmalloc(sizeof(slurmdb_assoc_rec_t));

	int i=0;
	int cond_set = 0, prev_set = 0, rec_set = 0, set = 0;
	List ret_list = NULL;

	slurmdb_init_assoc_rec(assoc, 0);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5))) {
			i++;
			prev_set = _set_cond(&i, argc, argv, acct_cond, NULL);
			cond_set |= prev_set;
		} else if (!xstrncasecmp(argv[i], "Set", MAX(command_len, 3))) {
			i++;
			prev_set = _set_rec(&i, argc, argv, NULL, NULL,
					    acct, assoc);
			rec_set |= prev_set;
		} else {
			prev_set = _set_cond(&i, argc, argv, acct_cond, NULL);
			cond_set |= prev_set;
		}
	}

	if (exit_code) {
		slurmdb_destroy_account_cond(acct_cond);
		slurmdb_destroy_account_rec(acct);
		slurmdb_destroy_assoc_rec(assoc);
		return SLURM_ERROR;
	} else if (!rec_set) {
		exit_code=1;
		fprintf(stderr, " You didn't give me anything to set\n");
		slurmdb_destroy_account_cond(acct_cond);
		slurmdb_destroy_account_rec(acct);
		slurmdb_destroy_assoc_rec(assoc);
		return SLURM_ERROR;
	} else if (!cond_set) {
		if (!commit_check("You didn't set any conditions with 'WHERE'.\n"
				 "Are you sure you want to continue?")) {
			printf("Aborted\n");
			slurmdb_destroy_account_cond(acct_cond);
			slurmdb_destroy_account_rec(acct);
			slurmdb_destroy_assoc_rec(assoc);
			return SLURM_SUCCESS;
		}
	}

	// Special case:  reset raw usage only
	if (assoc->usage) {
		rc = SLURM_ERROR;
		if (assoc->usage->usage_raw == 0.0)
			rc = sacctmgr_remove_assoc_usage(acct_cond->assoc_cond);
		else
			error("Raw usage can only be set to 0 (zero)");

		slurmdb_destroy_account_cond(acct_cond);
		slurmdb_destroy_account_rec(acct);
		slurmdb_destroy_assoc_rec(assoc);
		return rc;
	}

	notice_thread_init();
	if (rec_set & 1) { // process the account changes
		if (cond_set == 2) {
			exit_code=1;
			fprintf(stderr,
				" There was a problem with your "
				"'where' options.\n");
			rc = SLURM_ERROR;
			goto assoc_start;
		}
		ret_list = slurmdb_accounts_modify(
			db_conn, acct_cond, acct);
		if (ret_list && list_count(ret_list)) {
			char *object = NULL;
			ListIterator itr = list_iterator_create(ret_list);
			printf(" Modified accounts...\n");
			while((object = list_next(itr))) {
				printf("  %s\n", object);
			}
			list_iterator_destroy(itr);
			set = 1;
		} else if (ret_list) {
			printf(" Nothing modified\n");
			rc = SLURM_ERROR;
		} else {
			exit_code=1;
			fprintf(stderr, " Error with request: %s\n",
				slurm_strerror(errno));

			rc = SLURM_ERROR;
		}

		FREE_NULL_LIST(ret_list);
	}

assoc_start:
	if (rec_set == 3 || rec_set == 2) { // process the association changes
		if (cond_set == 1 && !acct_cond->assoc_cond->acct_list) {
			rc = SLURM_ERROR;
			exit_code=1;
			fprintf(stderr,
				" There was a problem with your "
				"'where' options.\n");
			goto assoc_end;
		}

		if (assoc->parent_acct) {
			slurmdb_account_rec_t *acct_rec =
				sacctmgr_find_account(assoc->parent_acct);
			if (!acct_rec) {
				exit_code=1;
				fprintf(stderr,
					" Parent Account %s doesn't exist.\n",
					assoc->parent_acct);
				rc = SLURM_ERROR;
				goto assoc_end;
			}
		}

		ret_list = slurmdb_associations_modify(
			db_conn, acct_cond->assoc_cond, assoc);

		if (ret_list && list_count(ret_list)) {
			set = 1;
			if (assoc->def_qos_id != NO_VAL)
				set = sacctmgr_check_default_qos(
					     assoc->def_qos_id,
					     acct_cond->assoc_cond);
			else if (assoc->qos_list)
				set = sacctmgr_check_default_qos(
					     -1, acct_cond->assoc_cond);

			if (set) {
				char *object = NULL;
				ListIterator itr = list_iterator_create(
					ret_list);
				printf(" Modified account associations...\n");
				while((object = list_next(itr))) {
					printf("  %s\n", object);
				}
				list_iterator_destroy(itr);
				set = 1;
			}
		} else if (ret_list) {
			printf(" Nothing modified\n");
			rc = SLURM_ERROR;
		} else {
			exit_code=1;
			fprintf(stderr, " Error with request: %s\n",
				slurm_strerror(errno));

			rc = SLURM_ERROR;
		}

		FREE_NULL_LIST(ret_list);
	}

assoc_end:

	notice_thread_fini();
	if (set) {
		if (commit_check("Would you like to commit changes?"))
			slurmdb_connection_commit(db_conn, 1);
		else {
			printf(" Changes Discarded\n");
			slurmdb_connection_commit(db_conn, 0);
		}
	}
	slurmdb_destroy_account_cond(acct_cond);
	slurmdb_destroy_account_rec(acct);
	slurmdb_destroy_assoc_rec(assoc);

	return rc;
}
Beispiel #20
0
/*
 * _thread_per_group_rpc - thread to issue an RPC for a group of nodes
 *                         sending message out to one and forwarding it to
 *                         others if necessary.
 * IN/OUT args - pointer to task_info_t, xfree'd on completion
 */
static void *_thread_per_group_rpc(void *args)
{
	int rc = SLURM_SUCCESS;
	slurm_msg_t msg;
	task_info_t *task_ptr = (task_info_t *) args;
	/* we cache some pointers from task_info_t because we need
	 * to xfree args before being finished with their use. xfree
	 * is required for timely termination of this pthread because
	 * xfree could lock it at the end, preventing a timely
	 * thread_exit */
	pthread_mutex_t *thread_mutex_ptr   = task_ptr->thread_mutex_ptr;
	pthread_cond_t  *thread_cond_ptr    = task_ptr->thread_cond_ptr;
	uint32_t        *threads_active_ptr = task_ptr->threads_active_ptr;
	thd_t           *thread_ptr         = task_ptr->thread_struct_ptr;
	state_t thread_state = DSH_NO_RESP;
	slurm_msg_type_t msg_type = task_ptr->msg_type;
	bool is_kill_msg, srun_agent;
	List ret_list = NULL;
	ListIterator itr;
	ret_data_info_t *ret_data_info = NULL;
	int found = 0;
	int sig_array[2] = {SIGUSR1, 0};
	/* Locks: Write job, write node */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };

	xassert(args != NULL);
	xsignal(SIGUSR1, _sig_handler);
	xsignal_unblock(sig_array);
	is_kill_msg = (	(msg_type == REQUEST_KILL_TIMELIMIT)	||
			(msg_type == REQUEST_TERMINATE_JOB) );
	srun_agent = (	(msg_type == SRUN_PING)			||
			(msg_type == SRUN_EXEC)			||
			(msg_type == SRUN_JOB_COMPLETE)		||
			(msg_type == SRUN_STEP_MISSING)		||
			(msg_type == SRUN_TIMEOUT)		||
			(msg_type == SRUN_USER_MSG)		||
			(msg_type == RESPONSE_RESOURCE_ALLOCATION) ||
			(msg_type == SRUN_NODE_FAIL) );

	thread_ptr->start_time = time(NULL);

	slurm_mutex_lock(thread_mutex_ptr);
	thread_ptr->state = DSH_ACTIVE;
	thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT;
	slurm_mutex_unlock(thread_mutex_ptr);

	/* send request message */
	slurm_msg_t_init(&msg);
	msg.msg_type = msg_type;
	msg.data     = task_ptr->msg_args_ptr;
#if 0
 	info("sending message type %u to %s", msg_type, thread_ptr->nodelist);
#endif
	if (task_ptr->get_reply) {
		if(thread_ptr->addr) {
			msg.address = *thread_ptr->addr;

			if(!(ret_list = slurm_send_addr_recv_msgs(
				     &msg, thread_ptr->nodelist, 0))) {
				error("_thread_per_group_rpc: "
				      "no ret_list given");
				goto cleanup;
			}


		} else {
			if(!(ret_list = slurm_send_recv_msgs(
				     thread_ptr->nodelist,
				     &msg, 0, true))) {
				error("_thread_per_group_rpc: "
				      "no ret_list given");
				goto cleanup;
			}
		}
	} else {
		if(thread_ptr->addr) {
			//info("got the address");
			msg.address = *thread_ptr->addr;
		} else {
			//info("no address given");
			if(slurm_conf_get_addr(thread_ptr->nodelist,
					       &msg.address) == SLURM_ERROR) {
				error("_thread_per_group_rpc: "
				      "can't find address for host %s, "
				      "check slurm.conf",
				      thread_ptr->nodelist);
				goto cleanup;
			}
		}
		//info("sending %u to %s", msg_type, thread_ptr->nodelist);
		if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) {
			thread_state = DSH_DONE;
		} else {
			if (!srun_agent)
				_comm_err(thread_ptr->nodelist, msg_type);
		}
		goto cleanup;
	}

	//info("got %d messages back", list_count(ret_list));
	found = 0;
	itr = list_iterator_create(ret_list);
	while ((ret_data_info = list_next(itr)) != NULL) {
		rc = slurm_get_return_code(ret_data_info->type,
					   ret_data_info->data);
		/* SPECIAL CASE: Mark node as IDLE if job already
		   complete */
		if (is_kill_msg &&
		    (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) {
			kill_job_msg_t *kill_job;
			kill_job = (kill_job_msg_t *)
				task_ptr->msg_args_ptr;
			rc = SLURM_SUCCESS;
			lock_slurmctld(job_write_lock);
			if (job_epilog_complete(kill_job->job_id,
						ret_data_info->
						node_name,
						rc))
				run_scheduler = true;
			unlock_slurmctld(job_write_lock);
		}
		/* SPECIAL CASE: Kill non-startable batch job,
		 * Requeue the job on ESLURMD_PROLOG_FAILED */
		if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) &&
		    (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) &&
		    (ret_data_info->type != RESPONSE_FORWARD_FAILED)) {
			batch_job_launch_msg_t *launch_msg_ptr =
				task_ptr->msg_args_ptr;
			uint32_t job_id = launch_msg_ptr->job_id;
			info("Killing non-startable batch job %u: %s",
			     job_id, slurm_strerror(rc));
			thread_state = DSH_DONE;
			ret_data_info->err = thread_state;
			lock_slurmctld(job_write_lock);
			job_complete(job_id, 0, false, false, _wif_status());
			unlock_slurmctld(job_write_lock);
			continue;
		}

		if (((msg_type == REQUEST_SIGNAL_TASKS) ||
		     (msg_type == REQUEST_TERMINATE_TASKS)) &&
		     (rc == ESRCH)) {
			/* process is already dead, not a real error */
			rc = SLURM_SUCCESS;
		}

		switch (rc) {
		case SLURM_SUCCESS:
			/* debug("agent processed RPC to node %s", */
			/*       ret_data_info->node_name); */
			thread_state = DSH_DONE;
			break;
		case SLURM_UNKNOWN_FORWARD_ADDR:
			error("We were unable to forward message to '%s'.  "
			      "Make sure the slurm.conf for each slurmd "
			      "contain all other nodes in your system.",
			      ret_data_info->node_name);
			thread_state = DSH_NO_RESP;
			break;
		case ESLURMD_EPILOG_FAILED:
			error("Epilog failure on host %s, "
			      "setting DOWN",
			      ret_data_info->node_name);

			thread_state = DSH_FAILED;
			break;
		case ESLURMD_PROLOG_FAILED:
			thread_state = DSH_FAILED;
			break;
		case ESLURM_INVALID_JOB_ID:
			/* Not indicative of a real error */
		case ESLURMD_JOB_NOTRUNNING:
			/* Not indicative of a real error */
			debug2("agent processed RPC to node %s: %s",
			       ret_data_info->node_name,
			       slurm_strerror(rc));

			thread_state = DSH_DONE;
			break;
		default:
			if (!srun_agent) {
				if (ret_data_info->err)
					errno = ret_data_info->err;
				else
					errno = rc;
				rc = _comm_err(ret_data_info->node_name,
					       msg_type);
			}
			if (srun_agent)
				thread_state = DSH_FAILED;
			else if(ret_data_info->type == RESPONSE_FORWARD_FAILED)
				/* check if a forward failed */
				thread_state = DSH_NO_RESP;
			else {	/* some will fail that don't mean anything went
				 * bad like a job term request on a job that is
				 * already finished, we will just exit on those
				 * cases */
				thread_state = DSH_DONE;
			}
		}
		ret_data_info->err = thread_state;
	}
	list_iterator_destroy(itr);

cleanup:
	xfree(args);

	/* handled at end of thread just in case resend is needed */
	destroy_forward(&msg.forward);
	slurm_mutex_lock(thread_mutex_ptr);
	thread_ptr->ret_list = ret_list;
	thread_ptr->state = thread_state;
	thread_ptr->end_time = (time_t) difftime(time(NULL),
						 thread_ptr->start_time);
	/* Signal completion so another thread can replace us */
	(*threads_active_ptr)--;
	pthread_cond_signal(thread_cond_ptr);
	slurm_mutex_unlock(thread_mutex_ptr);
	return (void *) NULL;
}
Beispiel #21
0
extern int sacctmgr_add_qos(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	int i=0, limit_set=0;
	ListIterator itr = NULL;
	slurmdb_qos_rec_t *qos = NULL;
	slurmdb_qos_rec_t *start_qos = xmalloc(sizeof(slurmdb_qos_rec_t));
	List name_list = list_create(slurm_destroy_char);
	char *description = NULL;
	char *name = NULL;
	List qos_list = NULL;
	char *qos_str = NULL;

	slurmdb_init_qos_rec(start_qos, 0, NO_VAL);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp (argv[i], "Set", MAX(command_len, 3)))
			i++;

		limit_set += _set_rec(&i, argc, argv, name_list, start_qos);
	}

	if (exit_code) {
		list_destroy(name_list);
		xfree(description);
		return SLURM_ERROR;
	} else if (!list_count(name_list)) {
		list_destroy(name_list);
		slurmdb_destroy_qos_rec(start_qos);
		exit_code=1;
		fprintf(stderr, " Need name of qos to add.\n");
		return SLURM_SUCCESS;
	}

	if (!g_qos_list) {
		g_qos_list = acct_storage_g_get_qos(db_conn, my_uid, NULL);

		if (!g_qos_list) {
			exit_code=1;
			fprintf(stderr, " Problem getting qos's "
				"from database.  "
				"Contact your admin.\n");
			list_destroy(name_list);
			xfree(description);
			return SLURM_ERROR;
		}
	}

	qos_list = list_create(slurmdb_destroy_qos_rec);

	itr = list_iterator_create(name_list);
	while((name = list_next(itr))) {
		qos = NULL;
		if (!sacctmgr_find_qos_from_list(g_qos_list, name)) {
			qos = xmalloc(sizeof(slurmdb_qos_rec_t));
			slurmdb_init_qos_rec(qos, 0, NO_VAL);
			qos->name = xstrdup(name);
			if (start_qos->description)
				qos->description =
					xstrdup(start_qos->description);
			else
				qos->description = xstrdup(name);

			qos->flags = start_qos->flags;
			qos->grace_time = start_qos->grace_time;
			qos->grp_cpu_mins = start_qos->grp_cpu_mins;
			qos->grp_cpu_run_mins = start_qos->grp_cpu_run_mins;
			qos->grp_cpus = start_qos->grp_cpus;
			qos->grp_jobs = start_qos->grp_jobs;
			qos->grp_mem = start_qos->grp_mem;
			qos->grp_nodes = start_qos->grp_nodes;
			qos->grp_submit_jobs = start_qos->grp_submit_jobs;
			qos->grp_wall = start_qos->grp_wall;

			qos->max_cpu_mins_pj = start_qos->max_cpu_mins_pj;
			qos->max_cpu_run_mins_pu =
				start_qos->max_cpu_run_mins_pu;
			qos->max_cpus_pj = start_qos->max_cpus_pj;
			qos->max_cpus_pu = start_qos->max_cpus_pu;
			qos->max_jobs_pu = start_qos->max_jobs_pu;
			qos->max_nodes_pj = start_qos->max_nodes_pj;
			qos->max_nodes_pu = start_qos->max_nodes_pu;
			qos->max_submit_jobs_pu = start_qos->max_submit_jobs_pu;
			qos->max_wall_pj = start_qos->max_wall_pj;

			qos->min_cpus_pj = start_qos->min_cpus_pj;

			qos->preempt_list =
				copy_char_list(start_qos->preempt_list);
			qos->preempt_mode = start_qos->preempt_mode;

			qos->priority = start_qos->priority;

			qos->usage_factor = start_qos->usage_factor;
			qos->usage_thres = start_qos->usage_thres;

			xstrfmtcat(qos_str, "  %s\n", name);
			list_append(qos_list, qos);
		}
	}
	list_iterator_destroy(itr);
	list_destroy(name_list);

	if (g_qos_list) {
		list_destroy(g_qos_list);
		g_qos_list = NULL;
	}

	if (!list_count(qos_list)) {
		printf(" Nothing new added.\n");
		rc = SLURM_ERROR;
		goto end_it;
	}

	if (qos_str) {
		printf(" Adding QOS(s)\n%s", qos_str);
		printf(" Settings\n");
		if (description)
			printf("  Description    = %s\n", description);
		else
			printf("  Description    = %s\n", "QOS Name");

		sacctmgr_print_qos_limits(start_qos);

		xfree(qos_str);
	}

	notice_thread_init();
	if (list_count(qos_list))
		rc = acct_storage_g_add_qos(db_conn, my_uid, qos_list);
	else
		goto end_it;

	notice_thread_fini();

	if (rc == SLURM_SUCCESS) {
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else {
		exit_code=1;
		fprintf(stderr, " Problem adding QOS: %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
	}

end_it:
	list_destroy(qos_list);
	xfree(description);

	return rc;
}
Beispiel #22
0
static void _preempt_job_dequeue(void)
{
	struct job_record *job_ptr;
	uint32_t job_id, *tmp_id;
	uint16_t preempt_mode;

	xassert(preempt_job_list);
	while ((tmp_id = list_pop(preempt_job_list))) {
		int rc = SLURM_ERROR;
		job_id = *tmp_id;
		xfree(tmp_id);

		if ((job_ptr = find_job_record(job_id)) == NULL) {
			error("_preempt_job_dequeue could not find job %u",
			      job_id);
			continue;
		}
		preempt_mode = slurm_job_preempt_mode(job_ptr);

		if (preempt_mode == PREEMPT_MODE_SUSPEND) {
			if ((rc = _suspend_job(job_id)) == ESLURM_DISABLED)
				rc = SLURM_SUCCESS;
		} else if (preempt_mode == PREEMPT_MODE_CANCEL) {
			rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true);
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been killed",
				     job_ptr->job_id);
			}
		} else if (preempt_mode == PREEMPT_MODE_CHECKPOINT) {
			checkpoint_msg_t ckpt_msg;
			memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t));
			ckpt_msg.op	   = CHECK_REQUEUE;
			ckpt_msg.job_id    = job_ptr->job_id;
			rc = job_checkpoint(&ckpt_msg, 0, -1,
					    (uint16_t)NO_VAL);
			if (rc == ESLURM_NOT_SUPPORTED) {
				memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t));
				ckpt_msg.op	   = CHECK_VACATE;
				ckpt_msg.job_id    = job_ptr->job_id;
				rc = job_checkpoint(&ckpt_msg, 0, -1,
						    (uint16_t)NO_VAL);
			}
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been checkpointed",
				     job_ptr->job_id);
			} else
				error("preempted job %u could not be "
				      "checkpointed: %s",
				      job_ptr->job_id, slurm_strerror(rc));
		} else if ((preempt_mode == PREEMPT_MODE_REQUEUE) &&
			   job_ptr->batch_flag && job_ptr->details &&
			   (job_ptr->details->requeue > 0)) {
			rc = job_requeue(0, job_ptr->job_id, NULL, true, 0);
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been requeued",
				     job_ptr->job_id);
			} else
				error("preempted job %u could not be "
				      "requeued: %s",
				      job_ptr->job_id, slurm_strerror(rc));
		} else if (preempt_mode == PREEMPT_MODE_OFF) {
			error("Invalid preempt_mode %u for job %u",
			      preempt_mode, job_ptr->job_id);
			continue;
		}

		if (rc != SLURM_SUCCESS) {
			rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true);
			if (rc == SLURM_SUCCESS)
				info("%s: preempted job %u had to be killed",
				     __func__,job_ptr->job_id);
			else {
				info("%s: preempted job %u kill failure %s",
				     __func__, job_ptr->job_id,
				     slurm_strerror(rc));
			}
		}
	}

	return;
}
Beispiel #23
0
/*
 * Print "message: error description" on stderr for current errno value.
 */
void slurm_perror(char *msg)
{
	fprintf(stderr, "%s: %s\n", msg, slurm_strerror(errno));
}
Beispiel #24
0
extern int sacctmgr_add_cluster(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	int i = 0;
	slurmdb_cluster_rec_t *cluster = NULL;
	List name_list = list_create(slurm_destroy_char);
	List cluster_list = NULL;
	slurmdb_association_rec_t start_assoc;

	int limit_set = 0;
	ListIterator itr = NULL, itr_c = NULL;
	char *name = NULL;
	uint16_t class = 0;

	slurmdb_init_association_rec(&start_assoc, 0);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp(argv[i], "Set", MAX(command_len, 3)))
			i++;
		limit_set += _set_rec(&i, argc, argv,
				      name_list, &start_assoc, &class);
	}
	if (exit_code) {
		list_destroy(name_list);
		return SLURM_ERROR;
	} else if (!list_count(name_list)) {
		list_destroy(name_list);
		exit_code=1;
		fprintf(stderr, " Need name of cluster to add.\n");
		return SLURM_ERROR;
	} else {
		List temp_list = NULL;
		slurmdb_cluster_cond_t cluster_cond;

		slurmdb_init_cluster_cond(&cluster_cond, 0);
		cluster_cond.cluster_list = name_list;
		cluster_cond.classification = class;

		temp_list = acct_storage_g_get_clusters(db_conn, my_uid,
							&cluster_cond);
		if (!temp_list) {
			exit_code=1;
			fprintf(stderr,
				" Problem getting clusters from database.  "
				"Contact your admin.\n");
			return SLURM_ERROR;
		}

		itr_c = list_iterator_create(name_list);
		itr = list_iterator_create(temp_list);
		while((name = list_next(itr_c))) {
			slurmdb_cluster_rec_t *cluster_rec = NULL;

			list_iterator_reset(itr);
			while((cluster_rec = list_next(itr))) {
				if (!strcasecmp(cluster_rec->name, name))
					break;
			}
			if (cluster_rec) {
				printf(" This cluster %s already exists.  "
				       "Not adding.\n", name);
				list_delete_item(itr_c);
			}
		}
		list_iterator_destroy(itr);
		list_iterator_destroy(itr_c);
		list_destroy(temp_list);
		if (!list_count(name_list)) {
			list_destroy(name_list);
			return SLURM_ERROR;
		}
	}

	printf(" Adding Cluster(s)\n");
	cluster_list = list_create(slurmdb_destroy_cluster_rec);
	itr = list_iterator_create(name_list);
	while((name = list_next(itr))) {
		if (!name[0]) {
			exit_code=1;
			fprintf(stderr, " No blank names are "
				"allowed when adding.\n");
			rc = SLURM_ERROR;
			continue;
		}
		cluster = xmalloc(sizeof(slurmdb_cluster_rec_t));
		slurmdb_init_cluster_rec(cluster, 0);

		list_append(cluster_list, cluster);
		cluster->flags = NO_VAL;
		cluster->name = xstrdup(name);
		cluster->classification = class;
		cluster->root_assoc =
			xmalloc(sizeof(slurmdb_association_rec_t));
		slurmdb_init_association_rec(cluster->root_assoc, 0);
		printf("  Name          = %s\n", cluster->name);
		if (cluster->classification)
			printf("  Classification= %s\n",
			       get_classification_str(cluster->classification));

		cluster->root_assoc->def_qos_id = start_assoc.def_qos_id;
		cluster->root_assoc->shares_raw = start_assoc.shares_raw;

		cluster->root_assoc->grp_cpus = start_assoc.grp_cpus;
		cluster->root_assoc->grp_jobs = start_assoc.grp_jobs;
		cluster->root_assoc->grp_mem = start_assoc.grp_mem;
		cluster->root_assoc->grp_nodes = start_assoc.grp_nodes;
		cluster->root_assoc->grp_submit_jobs =
			start_assoc.grp_submit_jobs;

		cluster->root_assoc->max_cpu_mins_pj =
			start_assoc.max_cpu_mins_pj;
		cluster->root_assoc->max_cpus_pj = start_assoc.max_cpus_pj;
		cluster->root_assoc->max_jobs = start_assoc.max_jobs;
		cluster->root_assoc->max_nodes_pj = start_assoc.max_nodes_pj;
		cluster->root_assoc->max_submit_jobs =
			start_assoc.max_submit_jobs;
		cluster->root_assoc->max_wall_pj = start_assoc.max_wall_pj;

		cluster->root_assoc->qos_list =
			copy_char_list(start_assoc.qos_list);
	}
	list_iterator_destroy(itr);
	list_destroy(name_list);

	if (limit_set) {
		printf(" Default Limits\n");
		sacctmgr_print_assoc_limits(&start_assoc);
		if (start_assoc.qos_list)
			list_destroy(start_assoc.qos_list);
	}

	if (!list_count(cluster_list)) {
		printf(" Nothing new added.\n");
		rc = SLURM_ERROR;
		goto end_it;
	}

	/* Since we are creating tables with add cluster that can't be
	   rolled back.  So we ask before hand if they are serious
	   about it so we can rollback if needed.
	*/
	if (commit_check("Would you like to commit changes?")) {
		notice_thread_init();
		rc = acct_storage_g_add_clusters(db_conn, my_uid, cluster_list);
		notice_thread_fini();
		if (rc == SLURM_SUCCESS) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			exit_code=1;
			fprintf(stderr, " Problem adding clusters: %s\n",
				slurm_strerror(rc));
			/* this isn't really needed, but just to be safe */
			acct_storage_g_commit(db_conn, 0);
		}
	} else {
		printf(" Changes Discarded\n");
		/* this isn't really needed, but just to be safe */
		acct_storage_g_commit(db_conn, 0);
	}

end_it:
	list_destroy(cluster_list);

	return rc;
}
Beispiel #25
0
extern void get_info_resv(GtkTable *table, display_data_t *display_data)
{
	int error_code = SLURM_SUCCESS;
	List info_list = NULL;
	static int view = -1;
	static reserve_info_msg_t *resv_info_ptr = NULL;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	static GtkWidget *display_widget = NULL;
	int j=0;
	ListIterator itr = NULL;
	sview_resv_info_t *sview_resv_info_ptr = NULL;
	reserve_info_t *resv_ptr = NULL;
	time_t now = time(NULL);
	GtkTreePath *path = NULL;
	static bool set_opts = FALSE;

	if (!set_opts)
		set_page_opts(RESV_PAGE, display_data_resv,
			      SORTID_CNT, _initial_page_opts);
	set_opts = TRUE;

	/* reset */
	if (!table && !display_data) {
		if (display_widget)
			gtk_widget_destroy(display_widget);
		display_widget = NULL;
		resv_info_ptr = NULL;
		goto reset_curs;
	}

	if (display_data)
		local_display_data = display_data;
	if (!table) {
		display_data_resv->set_menu = local_display_data->set_menu;
		goto reset_curs;
	}
	if (display_widget && toggled) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
		goto display_it;
	}

	error_code = get_new_info_resv(&resv_info_ptr, force_refresh);
	if (error_code == SLURM_NO_CHANGE_IN_DATA) {
	} else if (error_code != SLURM_SUCCESS) {
		if (view == ERROR_VIEW)
			goto end_it;
		if (display_widget)
			gtk_widget_destroy(display_widget);
		view = ERROR_VIEW;
		sprintf(error_char, "slurm_load_reservations: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(table, label, 0, 1, 0, 1);
		gtk_widget_show(label);
		display_widget = gtk_widget_ref(GTK_WIDGET(label));
		goto end_it;
	}

display_it:
	info_list = _create_resv_info_list(resv_info_ptr);
	if (!info_list)
		goto reset_curs;
	/* set up the grid */
	if (display_widget && GTK_IS_TREE_VIEW(display_widget)
	    && gtk_tree_selection_count_selected_rows(
		    gtk_tree_view_get_selection(
			    GTK_TREE_VIEW(display_widget)))) {
		GtkTreeViewColumn *focus_column = NULL;
		/* highlight the correct nodes from the last selection */
		gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget),
					 &path, &focus_column);
	}
	if (!path) {
		itr = list_iterator_create(info_list);
		while ((sview_resv_info_ptr = list_next(itr))) {
			resv_ptr = sview_resv_info_ptr->resv_ptr;
			if ((resv_ptr->start_time > now) ||
			    (resv_ptr->end_time   < now))
				continue;/* only map current reservations */
			j=0;
			while (resv_ptr->node_inx[j] >= 0) {
				change_grid_color(grid_button_list,
						  resv_ptr->node_inx[j],
						  resv_ptr->node_inx[j+1],
						  sview_resv_info_ptr->
						  color_inx,
						  true, 0);
				j += 2;
			}
		}
		list_iterator_destroy(itr);
		change_grid_color(grid_button_list, -1, -1,
				  MAKE_WHITE, true, 0);
	} else
		highlight_grid(GTK_TREE_VIEW(display_widget),
			       SORTID_NODE_INX, SORTID_COLOR_INX,
			       grid_button_list);

	if (view == ERROR_VIEW && display_widget) {
		gtk_widget_destroy(display_widget);
		display_widget = NULL;
	}
	if (!display_widget) {
		tree_view = create_treeview(local_display_data,
					    &grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		display_widget = gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		   to the treestore we don't really care about
		   the return value */
		create_treestore(tree_view, display_data_resv,
				 SORTID_CNT, SORTID_TIME_START, SORTID_COLOR);
	}

	view = INFO_VIEW;
	_update_info_resv(info_list, GTK_TREE_VIEW(display_widget));
end_it:
	toggled = FALSE;
	force_refresh = FALSE;
reset_curs:
	if (main_window && main_window->window)
		gdk_window_set_cursor(main_window->window, NULL);
	return;
}
Beispiel #26
0
extern int sacctmgr_modify_cluster(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	int i=0;
	slurmdb_association_rec_t *assoc =
		xmalloc(sizeof(slurmdb_association_rec_t));
	slurmdb_association_cond_t *assoc_cond =
		xmalloc(sizeof(slurmdb_association_cond_t));
	int cond_set = 0, prev_set = 0, rec_set = 0, set = 0;
	List ret_list = NULL;
	uint16_t class_rec = 0;
	slurmdb_cluster_cond_t cluster_cond;

	slurmdb_init_association_rec(assoc, 0);

	assoc_cond->cluster_list = list_create(slurm_destroy_char);
	assoc_cond->acct_list = list_create(NULL);

	slurmdb_init_cluster_cond(&cluster_cond, 0);
	cluster_cond.cluster_list = assoc_cond->cluster_list;

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) {
			i++;
			prev_set = _set_cond(&i, argc, argv,
					     &cluster_cond, NULL);
			cond_set |= prev_set;
		} else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) {
			i++;
			prev_set = _set_rec(&i, argc, argv,
					    NULL, assoc, &class_rec);
			rec_set |= prev_set;
		} else {
			prev_set = _set_cond(&i, argc, argv,
					     &cluster_cond, NULL);
			cond_set |= prev_set;
		}
	}

	if (!rec_set) {
		exit_code=1;
		fprintf(stderr, " You didn't give me anything to set\n");
		rc = SLURM_ERROR;
		goto end_it;
	} else if (!cond_set) {
		if (!commit_check("You didn't set any conditions with 'WHERE'.\n"
				 "Are you sure you want to continue?")) {
			printf("Aborted\n");
			rc = SLURM_SUCCESS;
			goto end_it;
		}
	} else if (exit_code) {
		rc = SLURM_ERROR;
		goto end_it;
	}

	if (cond_set & 1) {
		List temp_list = NULL;

		temp_list = acct_storage_g_get_clusters(db_conn, my_uid,
							&cluster_cond);
		if (!temp_list) {
			exit_code=1;
			fprintf(stderr,
				" Problem getting clusters from database.  "
				"Contact your admin.\n");
			rc = SLURM_ERROR;
			goto end_it;
		} else if (!list_count(temp_list)) {
			fprintf(stderr,
				" Query didn't return any clusters.\n");
			rc = SLURM_ERROR;
			goto end_it;
		}
		/* we are only looking for the clusters returned from
		   this query, so we free the cluster_list and replace
		   it */
		if (assoc_cond->cluster_list)
			list_destroy(assoc_cond->cluster_list);
		assoc_cond->cluster_list = temp_list;
	}

	printf(" Setting\n");
	if (rec_set) {
		printf(" Default Limits =\n");
		sacctmgr_print_assoc_limits(assoc);
		if (class_rec)
			printf(" Cluster Classification = %s\n",
			       get_classification_str(class_rec));
	}

	list_append(assoc_cond->acct_list, "root");
	notice_thread_init();
	ret_list = acct_storage_g_modify_associations(
		db_conn, my_uid, assoc_cond, assoc);

	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = list_iterator_create(ret_list);
		printf(" Modified cluster defaults for associations...\n");
		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		set = 1;
	} else if (ret_list) {
		printf(" Nothing modified\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		rc = SLURM_ERROR;
	}

	if (ret_list)
		list_destroy(ret_list);

	if (class_rec) {
		slurmdb_cluster_rec_t cluster_rec;

		slurmdb_init_cluster_rec(&cluster_rec, 0);
		/* the class has already returned these clusters so
		   just go with it */
		cluster_rec.classification = class_rec;

		ret_list = acct_storage_g_modify_clusters(
			db_conn, my_uid, &cluster_cond, &cluster_rec);

		if (ret_list && list_count(ret_list)) {
			char *object = NULL;
			ListIterator itr = list_iterator_create(ret_list);
			printf(" Modified cluster classifications...\n");
			while((object = list_next(itr))) {
				printf("  %s\n", object);
			}
			list_iterator_destroy(itr);
			set = 1;
		} else if (ret_list) {
			printf(" Nothing modified\n");
			rc = SLURM_ERROR;
		} else {
			exit_code=1;
			fprintf(stderr, " Error with request: %s\n",
				slurm_strerror(errno));
			rc = SLURM_ERROR;
		}

		if (ret_list)
			list_destroy(ret_list);
	}

	notice_thread_fini();

	if (set) {
		if (commit_check("Would you like to commit changes?"))
			acct_storage_g_commit(db_conn, 1);
		else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	}
end_it:
	slurmdb_destroy_association_cond(assoc_cond);
	slurmdb_destroy_association_rec(assoc);

	return rc;
}
Beispiel #27
0
static int _verify_job_ids(void)
{
	job_info_t *job_ptr;
	int i, j, rc = 0;

	if (opt.job_cnt == 0)
		return rc;

	opt.job_found = xmalloc(sizeof(bool) * opt.job_cnt);
	opt.job_pend  = xmalloc(sizeof(bool) * opt.job_cnt);
	job_ptr = job_buffer_ptr->job_array;
	for (i = 0; i < job_buffer_ptr->record_count; i++, job_ptr++) {
		/* NOTE: We re-use the job's "assoc_id" value as a flag to
		 * record if the job is referenced in the job list supplied
		 * by the user. */
		job_ptr->assoc_id = 0;
		if (IS_JOB_FINISHED(job_ptr))
			job_ptr->job_id = 0;
		if (job_ptr->job_id == 0)
			continue;

		for (j = 0; j < opt.job_cnt; j++) {
			if (opt.array_id[j] == NO_VAL) {
				if ((opt.job_id[j] == job_ptr->job_id) ||
				    ((opt.job_id[j] == job_ptr->array_job_id) &&
				     (opt.step_id[j] == SLURM_BATCH_SCRIPT))) {
					opt.job_found[j] = true;
				}
			} else if (opt.array_id[j] == INFINITE) {
				if (opt.job_id[j] == job_ptr->array_job_id) {
					opt.job_found[j] = true;
				}
			} else if (opt.job_id[j] != job_ptr->array_job_id) {
				continue;
			} else if (_is_task_in_job(job_ptr, opt.array_id[j])) {
				opt.job_found[j] = true;
			}
			if (opt.job_found[j]) {
				if (IS_JOB_PENDING(job_ptr))
					opt.job_pend[j] = true;
				job_ptr->assoc_id = 1;
			}
		}
		if (job_ptr->assoc_id == 0)
			job_ptr->job_id = 0;
	}

	for (j = 0; j < opt.job_cnt; j++) {
		char *job_id_str = NULL;
		if (!opt.job_found[j])
			rc = 1;
		else
			continue;

		if (opt.verbose < 0) {
			;
		} else if (opt.array_id[j] == NO_VAL) {
			xstrfmtcat(job_id_str, "%u", opt.job_id[j]);
		} else if (opt.array_id[j] == INFINITE) {
			xstrfmtcat(job_id_str, "%u_*", opt.job_id[j]);
		} else {
			xstrfmtcat(job_id_str, "%u_%u", opt.job_id[j],
				   opt.array_id[j]);
		}

		if (opt.verbose < 0) {
			;
		} else if (opt.step_id[j] == SLURM_BATCH_SCRIPT) {
			error("Kill job error on job id %s: %s",
			      job_id_str,
			      slurm_strerror(ESLURM_INVALID_JOB_ID));
		} else {
			error("Kill job error on job step id %s.%u: %s",
			      job_id_str, opt.step_id[j],
			      slurm_strerror(ESLURM_INVALID_JOB_ID));
		}
		xfree(job_id_str);

		/* Avoid this job in the cancel_job logic */
		opt.job_id[j] = 0;
	}

	return rc;
}
Beispiel #28
0
extern int sacctmgr_delete_cluster(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_cluster_cond_t *cluster_cond =
		xmalloc(sizeof(slurmdb_cluster_cond_t));
	int i=0;
	List ret_list = NULL;
	int cond_set = 0, prev_set;

	slurmdb_init_cluster_cond(cluster_cond, 0);
	cluster_cond->cluster_list = list_create(slurm_destroy_char);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp(argv[i], "Set", MAX(command_len, 3)))
			i++;
		prev_set = _set_cond(&i, argc, argv, cluster_cond, NULL);
		cond_set |= prev_set;
	}

	if (exit_code) {
		slurmdb_destroy_cluster_cond(cluster_cond);
		return SLURM_ERROR;
	} else if (!cond_set) {
		exit_code=1;
		fprintf(stderr,
			" No conditions given to remove, not executing.\n");
		slurmdb_destroy_cluster_cond(cluster_cond);
		return SLURM_ERROR;
	}

	if (!list_count(cluster_cond->cluster_list)
	   && !cluster_cond->classification) {
		exit_code=1;
		fprintf(stderr,
			"problem with delete request.  "
			"Nothing given to delete.\n");
		slurmdb_destroy_cluster_cond(cluster_cond);
		return SLURM_SUCCESS;
	}
	notice_thread_init();
	ret_list = acct_storage_g_remove_clusters(
		db_conn, my_uid, cluster_cond);
	rc = errno;
	notice_thread_fini();

	slurmdb_destroy_cluster_cond(cluster_cond);

	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = list_iterator_create(ret_list);
		/* If there were jobs running with an association to
		   be deleted, don't.
		*/
		if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) {
			fprintf(stderr, " Error with request: %s\n",
				slurm_strerror(rc));
			while((object = list_next(itr))) {
				fprintf(stderr,"  %s\n", object);
			}
			list_destroy(ret_list);
			acct_storage_g_commit(db_conn, 0);
			return rc;
		}
		printf(" Deleting clusters...\n");
		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else if (ret_list) {
		printf(" Nothing deleted\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		rc = SLURM_ERROR;
	}

	if (ret_list)
		list_destroy(ret_list);

	return rc;
}
Beispiel #29
0
static void *
_cancel_step_id (void *ci)
{
	int error_code = SLURM_SUCCESS, i;
	job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci;
	uint32_t job_id  = cancel_info->job_id;
	uint32_t step_id = cancel_info->step_id;
	bool sig_set = true;
	DEF_TIMERS;

	if (cancel_info->sig == (uint16_t) NO_VAL) {
		cancel_info->sig = SIGKILL;
		sig_set = false;
	}

	if (!cancel_info->job_id_str) {
		if (cancel_info->array_job_id &&
		    (cancel_info->array_task_id == INFINITE)) {
			xstrfmtcat(cancel_info->job_id_str, "%u_*",
				   cancel_info->array_job_id);
		} else if (cancel_info->array_job_id) {
			xstrfmtcat(cancel_info->job_id_str, "%u_%u",
				   cancel_info->array_job_id,
				   cancel_info->array_task_id);
		} else {
			xstrfmtcat(cancel_info->job_id_str, "%u",
				   cancel_info->job_id);
		}
	}

	for (i = 0; i < MAX_CANCEL_RETRY; i++) {
		if (cancel_info->sig == SIGKILL) {
			verbose("Terminating step %s.%u",
				cancel_info->job_id_str, step_id);
		} else {
			verbose("Signal %u to step %s.%u",
				cancel_info->sig,
				cancel_info->job_id_str, step_id);
		}

		_add_delay();
		START_TIMER;
		if ((!sig_set) || opt.ctld)
			error_code = slurm_kill_job_step(job_id, step_id,
							 cancel_info->sig);
		else if (cancel_info->sig == SIGKILL)
			error_code = slurm_terminate_job_step(job_id, step_id);
		else
			error_code = slurm_signal_job_step(job_id, step_id,
							   cancel_info->sig);
		END_TIMER;
		slurm_mutex_lock(&max_delay_lock);
		max_resp_time = MAX(max_resp_time, DELTA_TIMER);
		slurm_mutex_unlock(&max_delay_lock);

		if ((error_code == 0) ||
		    ((errno != ESLURM_TRANSITION_STATE_NO_UPDATE) &&
		     (errno != ESLURM_JOB_PENDING)))
			break;
		verbose("Job is in transistional state, retrying");
		sleep(5 + i);
	}
	if (error_code) {
		error_code = slurm_get_errno();
		if ((opt.verbose > 0) || (error_code != ESLURM_ALREADY_DONE))
			error("Kill job error on job step id %s: %s",
		 	      cancel_info->job_id_str,
			      slurm_strerror(slurm_get_errno()));

		if ((error_code == ESLURM_ALREADY_DONE) &&
		    (cancel_info->sig == SIGKILL)) {
			error_code = 0;	/* Ignore error if job done */
		}
	}

	/* Purposely free the struct passed in here, so the caller doesn't have
	 * to keep track of it, but don't destroy the mutex and condition
	 * variables contained. */
	slurm_mutex_lock(cancel_info->num_active_threads_lock);
	*(cancel_info->rc) = MAX(*(cancel_info->rc), error_code);
	(*(cancel_info->num_active_threads))--;
	slurm_cond_signal(cancel_info->num_active_threads_cond);
	slurm_mutex_unlock(cancel_info->num_active_threads_lock);

	xfree(cancel_info->job_id_str);
	xfree(cancel_info);
	return NULL;
}
Beispiel #30
0
extern int sacctmgr_list_problem(int argc, char *argv[])
{
	int rc = SLURM_SUCCESS;
	slurmdb_association_cond_t *assoc_cond =
		xmalloc(sizeof(slurmdb_association_cond_t));
	List assoc_list = NULL;
	slurmdb_association_rec_t *assoc = NULL;
	int i=0;
	ListIterator itr = NULL;
	ListIterator itr2 = NULL;
	List tree_list = NULL;

	int field_count = 0;

	print_field_t *field = NULL;

	List format_list = list_create(slurm_destroy_char);
	List print_fields_list; /* types are of print_field_t */

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp (argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp (argv[i], "Set", MAX(command_len, 3)))
			i++;
		_set_cond(&i, argc, argv, assoc_cond, format_list);
	}

	if (exit_code) {
		slurmdb_destroy_association_cond(assoc_cond);
		list_destroy(format_list);
		return SLURM_ERROR;
	} else if (!list_count(format_list))
		slurm_addto_char_list(format_list, "Cl,Acct,User,Problem");

	print_fields_list = sacctmgr_process_format_list(format_list);
	list_destroy(format_list);

	if (exit_code) {
		slurmdb_destroy_association_cond(assoc_cond);
		list_destroy(print_fields_list);
		return SLURM_ERROR;
	}

	assoc_list = acct_storage_g_get_problems(db_conn, my_uid, assoc_cond);
	slurmdb_destroy_association_cond(assoc_cond);

	if (!assoc_list) {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		list_destroy(print_fields_list);
		return SLURM_ERROR;
	}

	itr = list_iterator_create(assoc_list);
	itr2 = list_iterator_create(print_fields_list);
	print_fields_header(print_fields_list);

	field_count = list_count(print_fields_list);

	while((assoc = list_next(itr))) {
		int curr_inx = 1;
		while((field = list_next(itr2))) {
			switch(field->type) {
			case PRINT_ACCT:
				field->print_routine(
					field,
					assoc->acct,
					(curr_inx == field_count));
				break;
			case PRINT_CLUSTER:
				field->print_routine(
					field,
					assoc->cluster,
					(curr_inx == field_count));
				break;
			case PRINT_PROBLEM:
				/* make some sort of string here to
				   print out the problem reported.
				   Maybe make an array or something
				   and just print out a standard error.
				*/
				field->print_routine(
					field,
					slurmdb_problem_str_get(assoc->id),
					(curr_inx == field_count));
				break;
			case PRINT_USER:
				field->print_routine(field,
						     assoc->user,
						     (curr_inx == field_count));
				break;
			default:
				field->print_routine(
					field, NULL,
					(curr_inx == field_count));
				break;
			}
			curr_inx++;
		}
		list_iterator_reset(itr2);
		printf("\n");
	}

	if (tree_list)
		list_destroy(tree_list);

	list_iterator_destroy(itr2);
	list_iterator_destroy(itr);
	list_destroy(assoc_list);
	list_destroy(print_fields_list);
	tree_display = 0;
	return rc;
}