Ejemplo n.º 1
0
static int _attempt_backfill(void)
{
	DEF_TIMERS;
	bool filter_root = false;
	List job_queue;
	job_queue_rec_t *job_queue_rec;
	slurmdb_qos_rec_t *qos_ptr = NULL;
	int i, j, node_space_recs;
	struct job_record *job_ptr;
	struct part_record *part_ptr;
	uint32_t end_time, end_reserve;
	uint32_t time_limit, comp_time_limit, orig_time_limit;
	uint32_t min_nodes, max_nodes, req_nodes;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	time_t now, sched_start, later_start, start_res, resv_end;
	node_space_map_t *node_space;
	struct timeval bf_time1, bf_time2;
	static int sched_timeout = 0;
	int this_sched_timeout = 0, rc = 0;
	int job_test_count = 0;
	uint32_t *uid = NULL, nuser = 0;
	uint16_t *njobs = NULL;
	bool already_counted;

#ifdef HAVE_CRAY
	/*
	 * Run a Basil Inventory immediately before setting up the schedule
	 * plan, to avoid race conditions caused by ALPS node state change.
	 * Needs to be done with the node-state lock taken.
	 */
	START_TIMER;
	if (select_g_reconfigure()) {
		debug4("backfill: not scheduling due to ALPS");
		return SLURM_SUCCESS;
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: ALPS inventory completed, %s", TIME_STR);

	/* The Basil inventory can take a long time to complete. Process
	 * pending RPCs before starting the backfill scheduling logic */
	_yield_locks(1);
#endif

	START_TIMER;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		info("backfill: beginning");
	sched_start = now = time(NULL);
	if (sched_timeout == 0) {
		sched_timeout = slurm_get_msg_timeout() / 2;
		sched_timeout = MAX(sched_timeout, 1);
		sched_timeout = MIN(sched_timeout, 10);
	}
	this_sched_timeout = sched_timeout;

	if (slurm_get_root_filter())
		filter_root = true;

	job_queue = build_job_queue(true);
	if (list_count(job_queue) == 0) {
		debug("backfill: no jobs to backfill");
		list_destroy(job_queue);
		return 0;
	}

	gettimeofday(&bf_time1, NULL);

	slurmctld_diag_stats.bf_queue_len = list_count(job_queue);
	slurmctld_diag_stats.bf_queue_len_sum += slurmctld_diag_stats.
						 bf_queue_len;
	slurmctld_diag_stats.bf_last_depth = 0;
	slurmctld_diag_stats.bf_last_depth_try = 0;
	slurmctld_diag_stats.bf_when_last_cycle = now;
	bf_last_ints = 0;
	slurmctld_diag_stats.bf_active = 1;

	node_space = xmalloc(sizeof(node_space_map_t) *
			     (max_backfill_job_cnt + 3));
	node_space[0].begin_time = sched_start;
	node_space[0].end_time = sched_start + backfill_window;
	node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
	node_space[0].next = 0;
	node_space_recs = 1;
	if (debug_flags & DEBUG_FLAG_BACKFILL)
		_dump_node_space_table(node_space);

	if (max_backfill_job_per_user) {
		uid = xmalloc(BF_MAX_USERS * sizeof(uint32_t));
		njobs = xmalloc(BF_MAX_USERS * sizeof(uint16_t));
	}
	while ((job_queue_rec = (job_queue_rec_t *)
				list_pop_bottom(job_queue, sort_job_queue2))) {
		job_test_count++;
		job_ptr  = job_queue_rec->job_ptr;
		part_ptr = job_queue_rec->part_ptr;
		xfree(job_queue_rec);
		if (!IS_JOB_PENDING(job_ptr))
			continue;	/* started in other partition */
		job_ptr->part_ptr = part_ptr;

		if (debug_flags & DEBUG_FLAG_BACKFILL)
			info("backfill test for job %u", job_ptr->job_id);

		slurmctld_diag_stats.bf_last_depth++;
		already_counted = false;

		if (max_backfill_job_per_user) {
			for (j = 0; j < nuser; j++) {
				if (job_ptr->user_id == uid[j]) {
					njobs[j]++;
					debug2("backfill: user %u: #jobs %u",
					       uid[j], njobs[j]);
					break;
				}
			}
			if (j == nuser) { /* user not found */
				if (nuser < BF_MAX_USERS) {
					uid[j] = job_ptr->user_id;
					njobs[j] = 1;
					nuser++;
				} else {
					error("backfill: too many users in "
					      "queue. Consider increasing "
					      "BF_MAX_USERS");
				}
				debug2("backfill: found new user %u. "
				       "Total #users now %u",
				       job_ptr->user_id, nuser);
			} else {
				if (njobs[j] > max_backfill_job_per_user) {
					/* skip job */
					debug("backfill: have already checked "
					      "%u jobs for user %u; skipping "
					      "job %u",
					      max_backfill_job_per_user,
					      job_ptr->user_id,
					      job_ptr->job_id);
					continue;
				}
			}
		}

		if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
		    (part_ptr->node_bitmap == NULL))
		 	continue;
		if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
			continue;

		if ((!job_independent(job_ptr, 0)) ||
		    (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
			continue;

		/* Determine minimum and maximum node counts */
		min_nodes = MAX(job_ptr->details->min_nodes,
				part_ptr->min_nodes);
		if (job_ptr->details->max_nodes == 0)
			max_nodes = part_ptr->max_nodes;
		else
			max_nodes = MIN(job_ptr->details->max_nodes,
					part_ptr->max_nodes);
		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
		if (job_ptr->details->max_nodes)
			req_nodes = max_nodes;
		else
			req_nodes = min_nodes;
		if (min_nodes > max_nodes) {
			/* job's min_nodes exceeds partition's max_nodes */
			continue;
		}

		/* Determine job's expected completion time */
		if (job_ptr->time_limit == NO_VAL) {
			if (part_ptr->max_time == INFINITE)
				time_limit = 365 * 24 * 60; /* one year */
			else
				time_limit = part_ptr->max_time;
		} else {
			if (part_ptr->max_time == INFINITE)
				time_limit = job_ptr->time_limit;
			else
				time_limit = MIN(job_ptr->time_limit,
						 part_ptr->max_time);
		}
		comp_time_limit = time_limit;
		orig_time_limit = job_ptr->time_limit;
		qos_ptr = job_ptr->qos_ptr;
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE) &&
		    slurm_get_preempt_mode())
			time_limit = job_ptr->time_limit = 1;
		else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
			time_limit = job_ptr->time_limit = job_ptr->time_min;

		/* Determine impact of any resource reservations */
		later_start = now;
 TRY_LATER:	FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		start_res   = later_start;
		later_start = 0;
		j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap,
				  &exc_core_bitmap);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			continue;
		}
		if (start_res > now)
			end_time = (time_limit * 60) + start_res;
		else
			end_time = (time_limit * 60) + now;
		resv_end = find_resv_end(start_res);
		/* Identify usable nodes for this job */
		bit_and(avail_bitmap, part_ptr->node_bitmap);
		bit_and(avail_bitmap, up_node_bitmap);
		for (j=0; ; ) {
			if ((node_space[j].end_time > start_res) &&
			     node_space[j].next && (later_start == 0))
				later_start = node_space[j].end_time;
			if (node_space[j].end_time <= start_res)
				;
			else if (node_space[j].begin_time <= end_time) {
				bit_and(avail_bitmap,
					node_space[j].avail_bitmap);
			} else
				break;
			if ((j = node_space[j].next) == 0)
				break;
		}
		if ((resv_end++) &&
		    ((later_start == 0) || (resv_end < later_start))) {
			later_start = resv_end;
		}

		if (job_ptr->details->exc_node_bitmap) {
			bit_not(job_ptr->details->exc_node_bitmap);
			bit_and(avail_bitmap,
				job_ptr->details->exc_node_bitmap);
			bit_not(job_ptr->details->exc_node_bitmap);
		}

		/* Test if insufficient nodes remain OR
		 *	required nodes missing OR
		 *	nodes lack features */
		if ((bit_set_count(avail_bitmap) < min_nodes) ||
		    ((job_ptr->details->req_node_bitmap) &&
		     (!bit_super_set(job_ptr->details->req_node_bitmap,
				     avail_bitmap))) ||
		    (job_req_node_filter(job_ptr, avail_bitmap))) {
			if (later_start) {
				job_ptr->start_time = 0;
				goto TRY_LATER;
			}
			/* Job can not start until too far in the future */
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = sched_start + backfill_window;
			continue;
		}

		/* Identify nodes which are definitely off limits */
		FREE_NULL_BITMAP(resv_bitmap);
		resv_bitmap = bit_copy(avail_bitmap);
		bit_not(resv_bitmap);

		if ((time(NULL) - sched_start) >= this_sched_timeout) {
			uint32_t save_time_limit = job_ptr->time_limit;
			job_ptr->time_limit = orig_time_limit;
			if (debug_flags & DEBUG_FLAG_BACKFILL) {
				END_TIMER;
				info("backfill: yielding locks after testing "
				     "%d jobs, %s",
				     job_test_count, TIME_STR);
			}
			if (_yield_locks(backfill_interval)) {
				if (debug_flags & DEBUG_FLAG_BACKFILL) {
					info("backfill: system state changed, "
					     "breaking out after testing %d "
					     "jobs", job_test_count);
				}
				rc = 1;
				break;
			}
			job_ptr->time_limit = save_time_limit;
			/* Reset backfill scheduling timers, resume testing */
			sched_start = time(NULL);
			job_test_count = 0;
			START_TIMER;
		}
		/* this is the time consuming operation */
		debug2("backfill: entering _try_sched for job %u.",
		       job_ptr->job_id);

		if (!already_counted) {
			slurmctld_diag_stats.bf_last_depth_try++;
			already_counted = true;
		}

		j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes,
			       req_nodes, exc_core_bitmap);

		now = time(NULL);
		if (j != SLURM_SUCCESS) {
			job_ptr->time_limit = orig_time_limit;
			job_ptr->start_time = 0;	
			continue;	/* not runable */
		}

		if (start_res > job_ptr->start_time) {
			job_ptr->start_time = start_res;
			last_job_update = now;
		}
		if (job_ptr->start_time <= now) {
			int rc = _start_job(job_ptr, resv_bitmap);
			if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)){
				if (orig_time_limit == NO_VAL)
					orig_time_limit = comp_time_limit;
				job_ptr->time_limit = orig_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (orig_time_limit * 60);
			}
			else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
				/* Set time limit as high as possible */
				job_ptr->time_limit = comp_time_limit;
				job_ptr->end_time = job_ptr->start_time +
						    (comp_time_limit * 60);
				_reset_job_time_limit(job_ptr, now,
						      node_space);
				time_limit = job_ptr->time_limit;
			} else {
				job_ptr->time_limit = orig_time_limit;
			}
			if (rc == ESLURM_ACCOUNTING_POLICY) {
				/* Unknown future start time, just skip job */
				job_ptr->start_time = 0;	
				continue;
			} else if (rc != SLURM_SUCCESS) {
				/* Planned to start job, but something bad
				 * happended. */
				job_ptr->start_time = 0;	
				break;
			} else {
				/* Started this job, move to next one */
				continue;
			}
		} else
			job_ptr->time_limit = orig_time_limit;

		if (later_start && (job_ptr->start_time > later_start)) {
			/* Try later when some nodes currently reserved for
			 * pending jobs are free */
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		if (job_ptr->start_time > (sched_start + backfill_window)) {
			/* Starts too far in the future to worry about */
			continue;
		}

		if (node_space_recs >= max_backfill_job_cnt) {
			/* Already have too many jobs to deal with */
			break;
		}

		end_reserve = job_ptr->start_time + (time_limit * 60);
		if (_test_resv_overlap(node_space, avail_bitmap,
				       job_ptr->start_time, end_reserve)) {
			/* This job overlaps with an existing reservation for
			 * job to be backfill scheduled, which the sched
			 * plugin does not know about. Try again later. */
			later_start = job_ptr->start_time;
			job_ptr->start_time = 0;
			goto TRY_LATER;
		}

		/*
		 * Add reservation to scheduling table if appropriate
		 */
		if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
			continue;
		bit_not(avail_bitmap);
		_add_reservation(job_ptr->start_time, end_reserve,
				 avail_bitmap, node_space, &node_space_recs);
		if (debug_flags & DEBUG_FLAG_BACKFILL)
			_dump_node_space_table(node_space);
	}
	xfree(uid);
	xfree(njobs);
	FREE_NULL_BITMAP(avail_bitmap);
	FREE_NULL_BITMAP(exc_core_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	for (i=0; ; ) {
		FREE_NULL_BITMAP(node_space[i].avail_bitmap);
		if ((i = node_space[i].next) == 0)
			break;
	}
	xfree(node_space);
	list_destroy(job_queue);
	gettimeofday(&bf_time2, NULL);
	_do_diag_stats(&bf_time1, &bf_time2);
	if (debug_flags & DEBUG_FLAG_BACKFILL) {
		END_TIMER;
		info("backfill: completed testing %d jobs, %s",
		     job_test_count, TIME_STR);
	}
	return rc;
}
Ejemplo n.º 2
0
int
main (int argc, char *argv[])
{
	int error_code = SLURM_SUCCESS, opt_char;
	log_options_t opts = LOG_OPTS_STDERR_ONLY;
	shares_request_msg_t req_msg;
	shares_response_msg_t *resp_msg = NULL;
	char *temp = NULL;
	int option_index;
	bool all_users = 0;

	static struct option long_options[] = {
		{"accounts", 1, 0, 'A'},
		{"all",      0, 0, 'a'},
		{"long",     0, 0, 'l'},
		{"cluster",  1, 0, 'M'},
		{"clusters", 1, 0, 'M'},
		{"noheader", 0, 0, 'h'},
		{"parsable", 0, 0, 'p'},
		{"parsable2",0, 0, 'P'},
		{"users",    1, 0, 'u'},
		{"verbose",  0, 0, 'v'},
		{"version",  0, 0, 'V'},
		{"help",     0, 0, OPT_LONG_HELP},
		{"usage",    0, 0, OPT_LONG_USAGE},
		{NULL,       0, 0, 0}
	};

	exit_code         = 0;
	long_flag	  = 0;
	quiet_flag        = 0;
	verbosity         = 0;
	memset(&req_msg, 0, sizeof(shares_request_msg_t));
	log_init("sshare", opts, SYSLOG_FACILITY_DAEMON, NULL);

	while((opt_char = getopt_long(argc, argv, "aA:hlM:npPqu:t:vV",
			long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"sshare --help\" "
				"for more information\n");
			exit(1);
			break;
		case 'a':
			all_users = 1;
			break;
		case 'A':
			if(!req_msg.acct_list)
				req_msg.acct_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(req_msg.acct_list, optarg);
			break;
		case 'h':
			print_fields_have_header = 0;
			break;
			exit(exit_code);
			break;
		case 'l':
			long_flag = 1;
			break;
		case 'M':
			if(clusters)
				list_destroy(clusters);
			if(!(clusters =
			     slurmdb_get_info_cluster(optarg))) {
				error("'%s' can't be reached now, "
				      "or it is an invalid entry for "
				      "--cluster.  Use 'sacctmgr --list "
				      "cluster' to see avaliable clusters.",
				      optarg);
				exit(1);
			}
			working_cluster_rec = list_peek(clusters);
			break;
		case 'n':
			print_fields_have_header = 0;
			break;
		case 'p':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_ENDING;
			break;
		case 'P':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_NO_ENDING;
			break;
		case 'u':
			if(!strcmp(optarg, "-1")) {
				all_users = 1;
				break;
			}
			all_users = 0;
			if(!req_msg.user_list)
				req_msg.user_list =
					list_create(slurm_destroy_char);
			_addto_name_char_list(req_msg.user_list, optarg, 0);
			break;
		case 'v':
			quiet_flag = -1;
			verbosity++;
			break;
		case 'V':
			_print_version();
			exit(exit_code);
			break;
		case OPT_LONG_HELP:
		case OPT_LONG_USAGE:
			_usage();
			exit(0);
		default:
			exit_code = 1;
			fprintf(stderr, "getopt error, returned %c\n",
				opt_char);
			exit(exit_code);
		}
	}

	if (verbosity) {
		opts.stderr_level += verbosity;
		opts.prefix_level = 1;
		log_alter(opts, 0, NULL);
	}

	if(all_users) {
		if(req_msg.user_list
		   && list_count(req_msg.user_list)) {
			list_destroy(req_msg.user_list);
			req_msg.user_list = NULL;
		}
		if(verbosity)
			fprintf(stderr, "Users requested:\n\t: all\n");
	} else if (verbosity && req_msg.user_list
	    && list_count(req_msg.user_list)) {
		fprintf(stderr, "Users requested:\n");
		ListIterator itr = list_iterator_create(req_msg.user_list);
		while((temp = list_next(itr)))
			fprintf(stderr, "\t: %s\n", temp);
		list_iterator_destroy(itr);
	} else if(!req_msg.user_list || !list_count(req_msg.user_list)) {
		struct passwd *pwd = getpwuid(getuid());
		if(!req_msg.user_list)
			req_msg.user_list = list_create(slurm_destroy_char);
		temp = xstrdup(pwd->pw_name);
		list_append(req_msg.user_list, temp);
		if(verbosity) {
			fprintf(stderr, "Users requested:\n");
			fprintf(stderr, "\t: %s\n", temp);
		}
	}

	if(req_msg.acct_list && list_count(req_msg.acct_list)) {
		fprintf(stderr, "Accounts requested:\n");
		ListIterator itr = list_iterator_create(req_msg.acct_list);
		while((temp = list_next(itr)))
			fprintf(stderr, "\t: %s\n", temp);
		list_iterator_destroy(itr);
	} else {
		if(req_msg.acct_list
		   && list_count(req_msg.acct_list)) {
			list_destroy(req_msg.acct_list);
			req_msg.acct_list = NULL;
		}
		if(verbosity)
			fprintf(stderr, "Accounts requested:\n\t: all\n");

	}

	error_code = _get_info(&req_msg, &resp_msg);

	if(req_msg.acct_list)
		list_destroy(req_msg.acct_list);
	if(req_msg.user_list)
		list_destroy(req_msg.user_list);

	if (error_code) {
		slurm_perror("Couldn't get shares from controller");
		exit(error_code);
	}

	/* do stuff with it */
	process(resp_msg);

	slurm_free_shares_response_msg(resp_msg);

	exit(exit_code);
}
Ejemplo n.º 3
0
extern void specific_info_block(popup_info_t *popup_win)
{
	int part_error_code = SLURM_SUCCESS;
	int block_error_code = SLURM_SUCCESS;
	static partition_info_msg_t *part_info_ptr = NULL;
	static block_info_msg_t *block_info_ptr = NULL;
	specific_info_t *spec_info = popup_win->spec_info;
	sview_search_info_t *search_info = spec_info->search_info;
	char error_char[100];
	GtkWidget *label = NULL;
	GtkTreeView *tree_view = NULL;
	List block_list = NULL;
	List send_block_list = NULL;
	int changed = 1;
	sview_block_info_t *block_ptr = NULL;
	int j=0, i=-1;
	hostset_t hostset = NULL;
	ListIterator itr = NULL;

	if (!spec_info->display_widget) {
		setup_popup_info(popup_win, display_data_block, SORTID_CNT);
	}

	if (spec_info->display_widget && popup_win->toggled) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
		goto display_it;
	}

	if ((part_error_code = get_new_info_part(&part_info_ptr,
						 popup_win->force_refresh))
	    == SLURM_NO_CHANGE_IN_DATA) {

	} else if (part_error_code != SLURM_SUCCESS) {
		if (spec_info->view == ERROR_VIEW)
			goto end_it;
		spec_info->view = ERROR_VIEW;
		if (spec_info->display_widget)
			gtk_widget_destroy(spec_info->display_widget);
		sprintf(error_char, "slurm_load_partitions: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(popup_win->table,
					  label,
					  0, 1, 0, 1);
		gtk_widget_show(label);
		spec_info->display_widget = gtk_widget_ref(label);
		goto end_it;
	}

	if ((block_error_code =
	     get_new_info_block(&block_info_ptr, popup_win->force_refresh))
	    == SLURM_NO_CHANGE_IN_DATA) {
		if ((!spec_info->display_widget
		     || spec_info->view == ERROR_VIEW)
		    || (part_error_code != SLURM_NO_CHANGE_IN_DATA)) {
			goto display_it;
		}
		changed = 0;

	} else if (block_error_code != SLURM_SUCCESS) {
		if (spec_info->view == ERROR_VIEW)
			goto end_it;
		spec_info->view = ERROR_VIEW;
		if (spec_info->display_widget)
			gtk_widget_destroy(spec_info->display_widget);
		sprintf(error_char, "slurm_load_block: %s",
			slurm_strerror(slurm_get_errno()));
		label = gtk_label_new(error_char);
		gtk_table_attach_defaults(popup_win->table,
					  label,
					  0, 1, 0, 1);
		gtk_widget_show(label);
		spec_info->display_widget = gtk_widget_ref(label);
		goto end_it;
	}

display_it:
	block_list = _create_block_list(part_info_ptr, block_info_ptr,
					changed);
	if (!block_list)
		return;

	if (spec_info->view == ERROR_VIEW && spec_info->display_widget) {
		gtk_widget_destroy(spec_info->display_widget);
		spec_info->display_widget = NULL;
	}
	if (spec_info->type != INFO_PAGE && !spec_info->display_widget) {
		tree_view = create_treeview(local_display_data,
					    &popup_win->grid_button_list);
		gtk_tree_selection_set_mode(
			gtk_tree_view_get_selection(tree_view),
			GTK_SELECTION_MULTIPLE);
		spec_info->display_widget =
			gtk_widget_ref(GTK_WIDGET(tree_view));
		gtk_table_attach_defaults(popup_win->table,
					  GTK_WIDGET(tree_view),
					  0, 1, 0, 1);
		/* since this function sets the model of the tree_view
		   to the treestore we don't really care about
		   the return value */
		create_treestore(tree_view, popup_win->display_data,
				 SORTID_CNT, SORTID_BLOCK, SORTID_COLOR);
	}

	setup_popup_grid_list(popup_win);
	spec_info->view = INFO_VIEW;
	if (spec_info->type == INFO_PAGE) {
		_display_info_block(block_list, popup_win);
		goto end_it;
	}

	/* just linking to another list, don't free the inside, just
	   the list */
	send_block_list = list_create(NULL);
	itr = list_iterator_create(block_list);
	i = -1;
	while ((block_ptr = list_next(itr))) {
		/* we want to over ride any subgrp in error
		   state */
		enum node_states state = NODE_STATE_UNKNOWN;
		char *name = NULL;

		i++;
		switch(spec_info->type) {
		case PART_PAGE:
			if (strcmp(block_ptr->slurm_part_name,
				   search_info->gchar_data))
				continue;
			break;
		case RESV_PAGE:
		case NODE_PAGE:
			if (!block_ptr->mp_str)
				continue;
			if (!(hostset = hostset_create(
				      search_info->gchar_data)))
				continue;
			name = block_ptr->mp_str;
			if (block_ptr->small_block) {
				int j=0;
				/* strip off the ionodes part */
				while (name[j]) {
					if (name[j] == '[') {
						name[j] = '\0';
						break;
					}
					j++;
				}
			}

			if (!hostset_intersects(hostset, name)) {
				hostset_destroy(hostset);
				continue;
			}
			hostset_destroy(hostset);
			break;
		case BLOCK_PAGE:
			switch(search_info->search_type) {
			case SEARCH_BLOCK_NAME:
				if (!search_info->gchar_data)
					continue;

				if (strcmp(block_ptr->bg_block_name,
					   search_info->gchar_data))
					continue;
				break;
			case SEARCH_BLOCK_SIZE:
				if (search_info->int_data == NO_VAL)
					continue;
				if (block_ptr->cnode_cnt
				    != search_info->int_data)
					continue;
				break;
			case SEARCH_BLOCK_STATE:
				if (search_info->int_data == NO_VAL)
					continue;
				if (block_ptr->state != search_info->int_data)
					continue;

				break;
			default:
				continue;
				break;
			}
			break;
		case JOB_PAGE:
			if (strcmp(block_ptr->bg_block_name,
				   search_info->gchar_data))
				continue;
			break;
		default:
			g_print("Unknown type %d\n", spec_info->type);
			continue;
		}
		list_push(send_block_list, block_ptr);

		if (block_ptr->state & BG_BLOCK_ERROR_FLAG)
			state = NODE_STATE_ERROR;
		else if (block_ptr->job_running > NO_JOB_RUNNING)
			state = NODE_STATE_ALLOCATED;
		else
			state = NODE_STATE_IDLE;

		j=0;
		while (block_ptr->bp_inx[j] >= 0) {
			change_grid_color(
				popup_win->grid_button_list,
				block_ptr->bp_inx[j],
				block_ptr->bp_inx[j+1], block_ptr->color_inx,
				true, state);
			j += 2;
		}
	}
	list_iterator_destroy(itr);
	post_setup_popup_grid_list(popup_win);

	_update_info_block(send_block_list,
			   GTK_TREE_VIEW(spec_info->display_widget));
	list_destroy(send_block_list);
end_it:
	popup_win->toggled = 0;
	popup_win->force_refresh = 0;

	return;
}
Ejemplo n.º 4
0
Archivo: sprio.c Proyecto: Cray/slurm
int main (int argc, char *argv[])
{
	char *prio_type = NULL;
	int error_code = SLURM_SUCCESS;
	priority_factors_request_msg_t req_msg;
	priority_factors_response_msg_t *resp_msg = NULL;
	log_options_t opts = LOG_OPTS_STDERR_ONLY ;

	slurm_conf_init(NULL);
	log_init(xbasename(argv[0]), opts, SYSLOG_FACILITY_USER, NULL);

	parse_command_line( argc, argv );
	if (params.verbose) {
		opts.stderr_level += params.verbose;
		log_alter(opts, SYSLOG_FACILITY_USER, NULL);
	}

	if (working_cluster_rec) {
		slurm_ctl_conf_info_msg_t  *slurm_ctl_conf_ptr;

		error_code = slurm_load_ctl_conf((time_t) NULL,
						  &slurm_ctl_conf_ptr);
		if (error_code) {
			slurm_perror ("slurm_load_ctl_conf error");
			exit(error_code);
		}
		weight_age  = slurm_ctl_conf_ptr->priority_weight_age;
		weight_fs   = slurm_ctl_conf_ptr->priority_weight_fs;
		weight_js   = slurm_ctl_conf_ptr->priority_weight_js;
		weight_part = slurm_ctl_conf_ptr->priority_weight_part;
		weight_qos  = slurm_ctl_conf_ptr->priority_weight_qos;
		prio_type   = xstrdup(slurm_ctl_conf_ptr->priority_type);
		slurm_free_ctl_conf(slurm_ctl_conf_ptr);
	} else {
		weight_age  = slurm_get_priority_weight_age();
		weight_fs   = slurm_get_priority_weight_fairshare();
		weight_js   = slurm_get_priority_weight_job_size();
		weight_part = slurm_get_priority_weight_partition();
		weight_qos  = slurm_get_priority_weight_qos();
		prio_type   = slurm_get_priority_type();
	}

	/* Check to see if we are running a supported accounting plugin */
	if (strncasecmp(prio_type, "priority/multifactor", 20)) {
		fprintf (stderr, "You are not running a supported "
			 "priority plugin\n(%s).\n"
			 "Only 'priority/multifactor' and "
			 "'priority/multifactor2' are supported.\n",
			 prio_type);
		exit(1);
	}
	xfree(prio_type);


	memset(&req_msg, 0, sizeof(priority_factors_request_msg_t));

	if (params.jobs)
		req_msg.job_id_list = params.job_list;
	else
		req_msg.job_id_list = NULL;

	if (params.users)
		req_msg.uid_list = params.user_list;
	else
		req_msg.uid_list = NULL;

	error_code = _get_info(&req_msg, &resp_msg);

	if (error_code) {
		slurm_perror("Couldn't get priority factors from controller");
		exit(error_code);
	}

	if (params.format == NULL) {
		if (params.normalized) {
			if (params.long_list)
				params.format = "%.7i %.8u %10y %10a %10f %10j "
					"%10p %10q";
			else{
				params.format = xstrdup("%.7i");
				if (params.users)
					xstrcat(params.format, " %.8u");
				xstrcat(params.format, " %10y");
				if (weight_age)
					xstrcat(params.format, " %10a");
				if (weight_fs)
					xstrcat(params.format, " %10f");
				if (weight_js)
					xstrcat(params.format, " %10j");
				if (weight_part)
					xstrcat(params.format, " %10p");
				if (weight_qos)
					xstrcat(params.format, " %10q");
			}
		} else {
			if (params.long_list)
				params.format = "%.7i %.8u %.10Y %.10A %.10F "
					"%.10J %.10P %.10Q %.6N";
			else{
				params.format = xstrdup("%.7i");
				if (params.users)
					xstrcat(params.format, " %.8u");
				xstrcat(params.format, " %.10Y");
				if (weight_age)
					xstrcat(params.format, " %.10A");
				if (weight_fs)
					xstrcat(params.format, " %.10F");
				if (weight_js)
					xstrcat(params.format, " %.10J");
				if (weight_part)
					xstrcat(params.format, " %.10P");
				if (weight_qos)
					xstrcat(params.format, " %.10Q");
			}
		}
	}

	/* create the format list from the format */
	parse_format(params.format);

	if (params.jobs && (!resp_msg || !resp_msg->priority_factors_list ||
			    !list_count(resp_msg->priority_factors_list))) {
		printf("Unable to find jobs matching user/id(s) specified\n");
	} else if (resp_msg) {
		print_jobs_array(resp_msg->priority_factors_list,
				 params.format_list);
	}
#if 0
	/* Free storage here if we want to verify that logic.
	 * Since we exit next, this is not important */
 	list_destroy(params.format_list);
	slurm_free_priority_factors_response_msg(resp_msg);
#endif

	exit (error_code);
}
Ejemplo n.º 5
0
extern int create_full_system_block(List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	bg_record_t *bg_record = NULL;
	char *name = NULL;
	List records = NULL;
	uint16_t geo[SYSTEM_DIMENSIONS];
	int i;
	select_ba_request_t blockreq;
	List results = NULL;
	struct part_record *part_ptr = NULL;
	bitstr_t *bitmap = bit_alloc(node_record_count);
	static int *dims = NULL;
	bool larger = 0;
	char start_char[SYSTEM_DIMENSIONS+1];
	char geo_char[SYSTEM_DIMENSIONS+1];

	if (!dims) {
		dims = select_g_ba_get_dims();
		memset(start_char, 0, sizeof(start_char));
		memset(geo_char, 0, sizeof(geo_char));
	}

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	bit_not(bitmap);
	if (bit_ffs(bitmap) != -1) {
		error("We don't have the entire system covered by partitions, "
		      "can't create full system block");
		FREE_NULL_BITMAP(bitmap);
		return SLURM_ERROR;
	}
	FREE_NULL_BITMAP(bitmap);

	/* Here we are adding a block that in for the entire machine
	   just in case it isn't in the bluegene.conf file.
	*/
	slurm_mutex_lock(&block_state_mutex);

	for (i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo[i] = dims[i] - 1;
		if (geo[i] > 0)
			larger = 1;
		geo_char[i] = alpha_num[geo[i]];
		start_char[i] = alpha_num[0];
	}

	i = (10+strlen(bg_conf->slurm_node_prefix));
	name = xmalloc(i);

	if (!larger)
		snprintf(name, i, "%s%s",
			 bg_conf->slurm_node_prefix, start_char);
	else
		snprintf(name, i, "%s[%sx%s]",
			 bg_conf->slurm_node_prefix, start_char, geo_char);


	if (bg_found_block_list) {
		itr = list_iterator_create(bg_found_block_list);
		while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		error("create_full_system_block: no bg_found_block_list 2");
	}

	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		xfree(name);
		error("create_overlapped_blocks: no bg_lists->main 3");
		rc = SLURM_ERROR;
		goto no_total;
	}

	records = list_create(destroy_bg_record);

	memset(&blockreq, 0, sizeof(select_ba_request_t));
	blockreq.save_name = name;
	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		blockreq.conn_type[i] = SELECT_TORUS;

	add_bg_record(records, NULL, &blockreq, 0 , 0);
	xfree(name);

	bg_record = (bg_record_t *) list_pop(records);
	if (!bg_record) {
		error("Nothing was returned from full system create");
		rc = SLURM_ERROR;
		goto no_total;
	}
	reset_ba_system(false);
	for(i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo_char[i] = alpha_num[bg_record->geo[i]];
		start_char[i] = alpha_num[bg_record->start[i]];
	}
	debug2("adding %s %s %s",  bg_record->mp_str, start_char, geo_char);
	if (bg_record->ba_mp_list)
		list_flush(bg_record->ba_mp_list);
	else
		bg_record->ba_mp_list = list_create(destroy_ba_mp);
#ifdef HAVE_BGQ
	results = list_create(destroy_ba_mp);
#else
	results = list_create(NULL);
#endif
	name = set_bg_block(results,
			    bg_record->start,
			    bg_record->geo,
			    bg_record->conn_type);
	if (!name) {
		error("I was unable to make the full system block.");
		list_destroy(results);
		list_iterator_destroy(itr);
		slurm_mutex_unlock(&block_state_mutex);
		return SLURM_ERROR;
	}
	xfree(name);
	if (bg_record->ba_mp_list)
		list_destroy(bg_record->ba_mp_list);
#ifdef HAVE_BGQ
	bg_record->ba_mp_list = results;
	results = NULL;
#else
	bg_record->ba_mp_list = list_create(destroy_ba_mp);
	copy_node_path(results, &bg_record->ba_mp_list);
	list_destroy(results);
#endif
	if ((rc = bridge_block_create(bg_record)) == SLURM_ERROR) {
		error("create_full_system_block: "
		      "unable to configure block in api");
		destroy_bg_record(bg_record);
		goto no_total;
	}

	print_bg_record(bg_record);
	list_append(bg_lists->main, bg_record);

no_total:
	if (records)
		list_destroy(records);
	slurm_mutex_unlock(&block_state_mutex);
	return rc;
}
Ejemplo n.º 6
0
static Cfg *init_bearerbox(Cfg *cfg)
{
    CfgGroup *grp;
    Octstr *log, *val;
    long loglevel;
    int lf, m;
#ifdef HAVE_LIBSSL
    Octstr *ssl_server_cert_file;
    Octstr *ssl_server_key_file;
    int ssl_enabled = 0;
#endif /* HAVE_LIBSSL */

    /* defaults: use localtime and markers for access-log */
    lf = m = 1;
	
    grp = cfg_get_single_group(cfg, octstr_imm("core"));

    log = cfg_get(grp, octstr_imm("log-file"));
    if (log != NULL) {
	if (cfg_get_integer(&loglevel, grp, octstr_imm("log-level")) == -1)
	    loglevel = 0;
	log_open(octstr_get_cstr(log), loglevel, GW_NON_EXCL);
	octstr_destroy(log);
    }

    if (check_config(cfg) == -1)
        panic(0, "Cannot start with corrupted configuration");

    /* determine which timezone we use for access logging */
    if ((log = cfg_get(grp, octstr_imm("access-log-time"))) != NULL) {
        lf = (octstr_case_compare(log, octstr_imm("gmt")) == 0) ? 0 : 1;
        octstr_destroy(log);
    }

    /* should predefined markers be used, ie. prefixing timestamp */
    cfg_get_bool(&m, grp, octstr_imm("access-log-clean"));

    /* custom access-log format  */
    if ((log = cfg_get(grp, octstr_imm("access-log-format"))) != NULL) {
        bb_alog_init(log);
        octstr_destroy(log);
    }

    /* open access-log file */
    if ((log = cfg_get(grp, octstr_imm("access-log"))) != NULL) {
        alog_open(octstr_get_cstr(log), lf, m ? 0 : 1);
        octstr_destroy(log);
    }

    log = cfg_get(grp, octstr_imm("store-file"));
    /* initialize the store file */
    if (log != NULL) {
        store_init(log);
        octstr_destroy(log);
    }

    conn_config_ssl (grp);

    /* 
     * Make sure we have "ssl-server-cert-file" and "ssl-server-key-file" specified
     * in the core group since we need it to run SSL-enabled internal box 
     * connections configured via "smsbox-port-ssl = yes" and "wapbox-port-ssl = yes".
     * Check only these, because for "admin-port-ssl" and "sendsms-port-ssl" for the 
     * SSL-enabled HTTP servers are probed within gw/bb_http.c:httpadmin_start()
     */
#ifdef HAVE_LIBSSL
    ssl_server_cert_file = cfg_get(grp, octstr_imm("ssl-server-cert-file"));
    ssl_server_key_file = cfg_get(grp, octstr_imm("ssl-server-key-file"));
    if (ssl_server_cert_file != NULL && ssl_server_key_file != NULL) {
       /* we are fine, at least files are specified in the configuration */
    } else {
        cfg_get_bool(&ssl_enabled, grp, octstr_imm("smsbox-port-ssl"));
        cfg_get_bool(&ssl_enabled, grp, octstr_imm("wapbox-port-ssl"));
        if (ssl_enabled) {
	       panic(0, "You MUST specify cert and key files within core group for SSL-enabled inter-box connections!");
        }
    }
    octstr_destroy(ssl_server_cert_file);
    octstr_destroy(ssl_server_key_file);
#endif /* HAVE_LIBSSL */
	
    /* if all seems to be OK by the first glimpse, real start-up */
    
    outgoing_sms = list_create();
    incoming_sms = list_create();
    outgoing_wdp = list_create();
    incoming_wdp = list_create();

    outgoing_sms_counter = counter_create();
    incoming_sms_counter = counter_create();
    outgoing_wdp_counter = counter_create();
    incoming_wdp_counter = counter_create();
    
    status_mutex = mutex_create();

    setup_signal_handlers();

    
    /* http-admin is REQUIRED */
    httpadmin_start(cfg);

    if (cfg_get_integer(&max_incoming_sms_qlength, grp,
                           octstr_imm("maximum-queue-length")) == -1)
        max_incoming_sms_qlength = -1;
    else {
        warning(0, "Option 'maximum-queue-length' is deprecated! Please use"
                          " 'sms-incoming-queue-limit' instead!");
    }

    if (max_incoming_sms_qlength == -1 &&
        cfg_get_integer(&max_incoming_sms_qlength, grp,
                                  octstr_imm("sms-incoming-queue-limit")) == -1)
        max_incoming_sms_qlength = -1;

#ifndef NO_SMS    
    {
	List *list;
	
	list = cfg_get_multi_group(cfg, octstr_imm("smsc"));
	if (list != NULL) {
	    start_smsc(cfg);
	    list_destroy(list, NULL);
	}
    }
#endif
    
#ifndef NO_WAP
    grp = cfg_get_single_group(cfg, octstr_imm("core"));
    val = cfg_get(grp, octstr_imm("wdp-interface-name"));
    if (val != NULL && octstr_len(val) > 0)
	start_udp(cfg);
    octstr_destroy(val);

    if (cfg_get_single_group(cfg, octstr_imm("wapbox")) != NULL)
	start_wap(cfg);
#endif
    
    return cfg;
}
Ejemplo n.º 7
0
void __cdecl GGPROTO::dccmainthread(void*)
{
	uin_t uin;
	gg_event *e;
	struct timeval tv;
	fd_set rd, wd;
	int ret;
	SOCKET maxfd;
	DWORD tick;
	list_t l;
	char filename[MAX_PATH];

	// Zero up lists
	watches = transfers = requests = l = NULL;

	debugLogA("dccmainthread(): started. DCC Server Thread Starting");

	// Readup number
	if (!(uin = getDword(GG_KEY_UIN, 0)))
	{
		debugLogA("dccmainthread(): No Gadu-Gadu number specified. Exiting.");
		if (hEvent) SetEvent(hEvent);
#ifdef DEBUGMODE
		debugLogA("dccmainthread(): end 1.");
#endif
		return;
	}

	// Create listen socket on config direct port
	if (!(dcc = gg_dcc_socket_create(uin, (uint16_t)getWord(GG_KEY_DIRECTPORT, GG_KEYDEF_DIRECTPORT))))
	{
		debugLogA("dccmainthread(): Cannot create DCC listen socket. Exiting.");
		// Signalize mainthread we haven't start
		if (hEvent) SetEvent(hEvent);
#ifdef DEBUGMODE
		debugLogA("dccmainthread(): end 2.");
#endif
		return;
	}

	gg_dcc_port = dcc->port;
	gg_dcc_ip = inet_addr("255.255.255.255");
	debugLogA("dccmainthread(): Listening on port %d.", gg_dcc_port);

	// Signalize mainthread we started
	if (hEvent) SetEvent(hEvent);

	// Add main dcc handler to watches
	list_add(&watches, dcc, 0);

	// Do while we are in the main server thread
	while(pth_dcc.dwThreadId && dcc)
	{
		// Timeouts
		tv.tv_sec = 1;
		tv.tv_usec = 0;

		// Prepare descriptiors for select
		FD_ZERO(&rd);
		FD_ZERO(&wd);

		for (maxfd = 0, l = watches; l; l = l->next)
		{
			gg_common *w = (gg_common*)l->data;

			if (!w || w->state == GG_STATE_ERROR || w->state == GG_STATE_IDLE || w->state == GG_STATE_DONE)
				continue;

			// Check if it's proper descriptor
			if (w->fd == -1) continue;

			if (w->fd > maxfd)
				maxfd = w->fd;
			if ((w->check & GG_CHECK_READ))
				FD_SET(w->fd, &rd);
			if ((w->check & GG_CHECK_WRITE))
				FD_SET(w->fd, &wd);
		}

		// Wait for data on selects
		ret = select(maxfd + 1, &rd, &wd, NULL, &tv);

		// Check for select error
		if (ret == -1)
		{
			if (errno == EBADF)
				debugLogA("dccmainthread(): Bad descriptor on select().");
			else if (errno != EINTR)
				debugLogA("dccmainthread(): Unknown error on select().");
			continue;
		}

		// Process watches (carefull with l)
		l = watches;
		gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
		while (l)
		{
			struct gg_common *local_c = (gg_common*)l->data;
			struct gg_dcc *local_dcc = (gg_dcc*)l->data;
			struct gg_dcc7 *local_dcc7 = (gg_dcc7*)l->data;
			l = l->next;

			switch (local_c->type)
			{
				default:
					if (!local_dcc || (!FD_ISSET(local_dcc->fd, &rd) && !FD_ISSET(local_dcc->fd, &wd)))
						continue;

					/////////////////////////////////////////////////////////////////
					// Process DCC events

					// Connection broken/closed
					gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 2, "ft_mutex", 1);
					if (!(e = gg_dcc_socket_watch_fd(local_dcc)))
					{
						debugLogA("dccmainthread(): Socket closed.");
						// Remove socket and _close
						gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
						list_remove(&watches, local_dcc, 0);
						gg_dcc_socket_free(local_dcc);

						// Check if it's main socket
						if (local_dcc == dcc) dcc = NULL;
						continue;
					} else {
						gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
						debugLogA("dccmainthread(): Event: %s", ggdebug_eventtype(e));
					}

					switch(e->type)
					{
						// Client connected
						case GG_EVENT_DCC_NEW:
							list_add(&watches, e->event.dcc_new, 0);
							e->event.dcc_new = NULL;
							break;

						//
						case GG_EVENT_NONE:
							// If transfer in progress do status
							if (local_dcc->file_fd != -1 && local_dcc->offset > 0 && (((tick = GetTickCount()) - local_dcc->tick) > GGSTATREFRESHEVERY))
							{
								PROTOFILETRANSFERSTATUS pfts;
								local_dcc->tick = tick;
								strncpy(filename, local_dcc->folder, sizeof(filename));
								strncat(filename, (char*)local_dcc->file_info.filename, sizeof(filename) - strlen(filename));
								memset(&pfts, 0, sizeof(PROTOFILETRANSFERSTATUS));
								pfts.cbSize = sizeof(PROTOFILETRANSFERSTATUS);
								pfts.hContact = (HANDLE)local_dcc->contact;
								pfts.flags = (local_dcc->type == GG_SESSION_DCC_SEND);
								pfts.pszFiles = NULL;
								pfts.totalFiles = 1;
								pfts.currentFileNumber = 0;
								pfts.totalBytes = local_dcc->file_info.size;
								pfts.totalProgress = local_dcc->offset;
								pfts.szWorkingDir = local_dcc->folder;
								pfts.szCurrentFile = filename;
								pfts.currentFileSize = local_dcc->file_info.size;
								pfts.currentFileProgress = local_dcc->offset;
								pfts.currentFileTime = 0;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 3, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc->contact, ACKTYPE_FILE, ACKRESULT_DATA, local_dcc, (LPARAM)&pfts);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}
							break;

						// Connection was successfuly ended
						case GG_EVENT_DCC_DONE:
							debugLogA("dccmainthread(): Client: %d, Transfer done ! Closing connection.", dcc->peer_uin);
							// Remove from watches
							list_remove(&watches, local_dcc, 0);
							// Close file & success
							if (local_dcc->file_fd != -1)
							{
								PROTOFILETRANSFERSTATUS pfts;
								strncpy(filename, local_dcc->folder, sizeof(filename));
								strncat(filename, (char*)local_dcc->file_info.filename, sizeof(filename) - strlen(filename));
								memset(&pfts, 0, sizeof(PROTOFILETRANSFERSTATUS));
								pfts.cbSize = sizeof(PROTOFILETRANSFERSTATUS);
								pfts.hContact = (HANDLE)local_dcc->contact;
								pfts.flags = (local_dcc->type == GG_SESSION_DCC_SEND);
								pfts.pszFiles = NULL;
								pfts.totalFiles = 1;
								pfts.currentFileNumber = 0;
								pfts.totalBytes = local_dcc->file_info.size;
								pfts.totalProgress = local_dcc->file_info.size;
								pfts.szWorkingDir = local_dcc->folder;
								pfts.szCurrentFile = filename;
								pfts.currentFileSize = local_dcc->file_info.size;
								pfts.currentFileProgress = local_dcc->file_info.size;
								pfts.currentFileTime = 0;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 4, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc->contact, ACKTYPE_FILE, ACKRESULT_DATA, local_dcc, (LPARAM)&pfts);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
								_close(local_dcc->file_fd); local_dcc->file_fd = -1;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 5, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc->contact, ACKTYPE_FILE, ACKRESULT_SUCCESS, local_dcc, 0);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}
							// Free dcc
							gg_free_dcc(local_dcc); if (local_dcc == dcc) dcc = NULL;
							break;

						// Client error
						case GG_EVENT_DCC_ERROR:
							switch (e->event.dcc_error)
							{
								case GG_ERROR_DCC_HANDSHAKE:
									debugLogA("dccmainthread(): Client: %d, Handshake error.", local_dcc->peer_uin);
									break;
								case GG_ERROR_DCC_NET:
									debugLogA("dccmainthread(): Client: %d, Network error.", local_dcc->peer_uin);
									break;
								case GG_ERROR_DCC_FILE:
									debugLogA("dccmainthread(): Client: %d, File read/write error.", local_dcc->peer_uin);
									break;
								case GG_ERROR_DCC_EOF:
									debugLogA("dccmainthread(): Client: %d, End of file/connection error.", local_dcc->peer_uin);
									break;
								case GG_ERROR_DCC_REFUSED:
									debugLogA("dccmainthread(): Client: %d, Connection refused error.", local_dcc->peer_uin);
									break;
								default:
									debugLogA("dccmainthread(): Client: %d, Unknown error.", local_dcc->peer_uin);
							}
							// Don't do anything if it's main socket
							if (local_dcc == dcc) break;

							// Remove from watches
							list_remove(&watches, local_dcc, 0);

							// Close file & fail
							if (local_dcc->contact)
							{
								_close(local_dcc->file_fd); local_dcc->file_fd = -1;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 6, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc->contact, ACKTYPE_FILE, ACKRESULT_FAILED, local_dcc, 0);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}
							// Free dcc
							gg_free_dcc(local_dcc); if (local_dcc == dcc) dcc = NULL;
							break;

						// Need file acknowledgement
						case GG_EVENT_DCC_NEED_FILE_ACK:
							debugLogA("dccmainthread(): Client: %d, File ack filename \"%s\" size %d.", local_dcc->peer_uin,
									local_dcc->file_info.filename, local_dcc->file_info.size);
							// Do not watch for transfer until user accept it
							list_remove(&watches, local_dcc, 0);
							// Add to waiting transfers
							list_add(&transfers, local_dcc, 0);

							//////////////////////////////////////////////////
							// Add file recv request
							{
								// Make new ggtransfer struct
								local_dcc->contact = getcontact(local_dcc->peer_uin, 0, 0, NULL);
								TCHAR* filenameT = mir_utf8decodeT((char*)dcc->file_info.filename);

								PROTORECVFILET pre = {0};
								pre.flags = PREF_TCHAR;
								pre.fileCount = 1;
								pre.timestamp = time(NULL);
								pre.tszDescription = filenameT;
								pre.ptszFiles = &filenameT;
								pre.lParam = (LPARAM)local_dcc;

								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 7, "ft_mutex", 1);
								ProtoChainRecvFile(local_dcc->contact, &pre);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);

								mir_free(filenameT);
							}
							break;

						// Need client accept
						case GG_EVENT_DCC_CLIENT_ACCEPT:
							debugLogA("dccmainthread(): Client: %d, Client accept.", local_dcc->peer_uin);
							// Check if user is on the list and if it is my uin
							if (getcontact(local_dcc->peer_uin, 0, 0, NULL) &&
								getDword(GG_KEY_UIN, -1) == local_dcc->uin)
								break;

							// Kill unauthorized dcc
							list_remove(&watches, dcc, 0);
							gg_free_dcc(local_dcc); if (local_dcc == dcc) dcc = NULL;
							break;

						// Client connected as we wished to (callback)
						case GG_EVENT_DCC_CALLBACK:
						{
							int found = 0;
							debugLogA("dccmainthread(): Callback from client %d.", local_dcc->peer_uin);
							// Seek for stored callback request
							for (l = requests; l; l = l->next)
							{
								struct gg_dcc *req = (gg_dcc*)l->data;

								if (req && req->peer_uin == local_dcc->peer_uin)
								{
									gg_dcc_set_type(local_dcc, GG_SESSION_DCC_SEND);
									found = 1;

									// Copy data req ===> dcc
									local_dcc->folder = req->folder;
									local_dcc->contact = req->contact;
									local_dcc->file_fd = req->file_fd;
									memcpy(&local_dcc->file_info, &req->file_info, sizeof(struct gg_file_info));
									// Copy data back to dcc ===> req
									memcpy(req, local_dcc, sizeof(struct gg_dcc));

									// Remove request
									list_remove(&requests, req, 0);
									// Remove dcc from watches
									list_remove(&watches, local_dcc, 0);
									// Add request to watches
									list_add(&watches, req, 0);
									// Free old dat
									gg_free_dcc(local_dcc);
									debugLogA("dccmainthread(): Found stored request to client %d, filename \"%s\" size %d, folder \"%s\".",
										req->peer_uin, req->file_info.filename, req->file_info.size, req->folder);
									break;
								}
							}

							if (!found)
							{
								debugLogA("dccmainthread(): Unknown request to client %d.", local_dcc->peer_uin);
								// Kill unauthorized dcc
								list_remove(&watches, local_dcc, 0);
								gg_free_dcc(local_dcc); if (local_dcc == dcc) dcc = NULL;
							}
							break;
						}
					}

					// Free event
					gg_free_event(e);
					break;

				case GG_SESSION_DCC7_SOCKET:
				case GG_SESSION_DCC7_GET:
				case GG_SESSION_DCC7_SEND:
				case GG_SESSION_DCC7_VOICE:
					if (!local_dcc7 || (!FD_ISSET(local_dcc7->fd, &rd) && !FD_ISSET(local_dcc7->fd, &wd)))
						continue;

					/////////////////////////////////////////////////////////////////
					// Process DCC7 events

					// Connection broken/closed
					gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 8, "ft_mutex", 1);
					if (!(e = gg_dcc7_watch_fd(local_dcc7)))
					{
						debugLogA("dccmainthread(): Socket closed.");
						// Remove socket and _close
						gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
						list_remove(&watches, local_dcc7, 0);
						gg_dcc7_free(local_dcc7);
						continue;
					} else {
						gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
						debugLogA("dccmainthread(): Event: %s", ggdebug_eventtype(e));
					}

					switch(e->type)
					{
						//
						case GG_EVENT_NONE:
							// If transfer in progress do status
							if (local_dcc7->file_fd != -1 && local_dcc7->offset > 0 && (((tick = GetTickCount()) - local_dcc7->tick) > GGSTATREFRESHEVERY))
							{
								PROTOFILETRANSFERSTATUS pfts;
								local_dcc7->tick = tick;
								strncpy(filename, local_dcc7->folder, sizeof(filename));
								strncat(filename, (char*)local_dcc7->filename, sizeof(filename) - strlen(filename));
								memset(&pfts, 0, sizeof(PROTOFILETRANSFERSTATUS));
								pfts.cbSize = sizeof(PROTOFILETRANSFERSTATUS);
								pfts.hContact = (HANDLE)local_dcc7->contact;
								pfts.flags = (local_dcc7->type == GG_SESSION_DCC7_SEND);
								pfts.pszFiles = NULL;
								pfts.totalFiles = 1;
								pfts.currentFileNumber = 0;
								pfts.totalBytes = local_dcc7->size;
								pfts.totalProgress = local_dcc7->offset;
								pfts.szWorkingDir = local_dcc7->folder;
								pfts.szCurrentFile = filename;
								pfts.currentFileSize = local_dcc7->size;
								pfts.currentFileProgress = local_dcc7->offset;
								pfts.currentFileTime = 0;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 9, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc7->contact, ACKTYPE_FILE, ACKRESULT_DATA, local_dcc7, (LPARAM)&pfts);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}
							break;

						// Connection was successfuly ended
						case GG_EVENT_DCC7_DONE:
							debugLogA("dccmainthread(): Client: %d, Transfer done ! Closing connection.", dcc->peer_uin);
							// Remove from watches
							list_remove(&watches, local_dcc7, 0);
							// Close file & success
							if (local_dcc7->file_fd != -1)
							{
								PROTOFILETRANSFERSTATUS pfts;
								strncpy(filename, local_dcc7->folder, sizeof(filename));
								strncat(filename, (char*)local_dcc7->filename, sizeof(filename) - strlen(filename));
								memset(&pfts, 0, sizeof(PROTOFILETRANSFERSTATUS));
								pfts.cbSize = sizeof(PROTOFILETRANSFERSTATUS);
								pfts.hContact = (HANDLE)local_dcc7->contact;
								pfts.flags = (local_dcc7->type == GG_SESSION_DCC7_SEND);
								pfts.pszFiles = NULL;
								pfts.totalFiles = 1;
								pfts.currentFileNumber = 0;
								pfts.totalBytes = local_dcc7->size;
								pfts.totalProgress = local_dcc7->size;
								pfts.szWorkingDir = local_dcc7->folder;
								pfts.szCurrentFile = filename;
								pfts.currentFileSize = local_dcc7->size;
								pfts.currentFileProgress = local_dcc7->size;
								pfts.currentFileTime = 0;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 10, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc7->contact, ACKTYPE_FILE, ACKRESULT_DATA, local_dcc7, (LPARAM)&pfts);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
								_close(local_dcc7->file_fd); local_dcc7->file_fd = -1;
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 11, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc7->contact, ACKTYPE_FILE, ACKRESULT_SUCCESS, local_dcc7, 0);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}
							// Free dcc
							gg_dcc7_free(local_dcc7);
							break;

						// Client error
						case GG_EVENT_DCC7_ERROR:
							switch (e->event.dcc7_error)
							{
								case GG_ERROR_DCC7_HANDSHAKE:
									debugLogA("dccmainthread(): Client: %d, Handshake error.", local_dcc7->peer_uin);
									break;
								case GG_ERROR_DCC7_NET:
									debugLogA("dccmainthread(): Client: %d, Network error.", local_dcc7->peer_uin);
									break;
								case GG_ERROR_DCC7_FILE:
									debugLogA("dccmainthread(): Client: %d, File read/write error.", local_dcc7->peer_uin);
									break;
								case GG_ERROR_DCC7_EOF:
									debugLogA("dccmainthread(): Client: %d, End of file/connection error.", local_dcc7->peer_uin);
									break;
								case GG_ERROR_DCC7_REFUSED:
									debugLogA("dccmainthread(): Client: %d, Connection refused error.", local_dcc7->peer_uin);
									break;
								case GG_ERROR_DCC7_RELAY:
									debugLogA("dccmainthread(): Client: %d, Relay connection error.", local_dcc7->peer_uin);
									break;
								default:
									debugLogA("dccmainthread(): Client: %d, Unknown error.", local_dcc7->peer_uin);
							}
							// Remove from watches
							list_remove(&watches, local_dcc7, 0);

							// Close file & fail
							if (local_dcc7->file_fd != -1)
							{
								_close(local_dcc7->file_fd);
								local_dcc7->file_fd = -1;
							}

							if (local_dcc7->contact) {
								gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 12, "ft_mutex", 1);
								ProtoBroadcastAck(local_dcc7->contact, ACKTYPE_FILE, ACKRESULT_FAILED, local_dcc7, 0);
								gg_EnterCriticalSection(&ft_mutex, "dccmainthread", 37, "ft_mutex", 1);
							}

							// Free dcc
							gg_dcc7_free(local_dcc7);
							break;
					}

					// Free event
					gg_free_event(e);
					break;
			}
		}
		gg_LeaveCriticalSection(&ft_mutex, "dccmainthread", 37, 1, "ft_mutex", 1);
	}

	// Close all dcc client sockets
	for (l = watches; l; l = l->next)
	{
		struct gg_common *c = (gg_common*)l->data;
		if (!c) continue;
		if (c->type == GG_SESSION_DCC7_SOCKET || c->type == GG_SESSION_DCC7_SEND || c->type == GG_SESSION_DCC7_GET)
		{
			struct gg_dcc7 *local_dcc7 = (gg_dcc7*)l->data;
			gg_dcc7_free(local_dcc7);
		}
		else
		{
			struct gg_dcc *local_dcc = (gg_dcc*)l->data;
			gg_dcc_socket_free(local_dcc);

			// Check if it's main socket
			if (local_dcc == dcc) dcc = NULL;
		}
	}
	// Close all waiting for aknowledgle transfers
	for (l = transfers; l; l = l->next)
	{
		struct gg_common *c = (gg_common*)l->data;
		if (!c) continue;
		if (c->type == GG_SESSION_DCC7_SOCKET || c->type == GG_SESSION_DCC7_SEND || c->type == GG_SESSION_DCC7_GET)
		{
			struct gg_dcc7 *local_dcc7 = (gg_dcc7*)l->data;
			gg_dcc7_free(local_dcc7);
		}
		else
		{
			struct gg_dcc *local_dcc = (gg_dcc*)l->data;
			gg_dcc_socket_free(local_dcc);
		}
	}
	// Close all waiting dcc requests
	for (l = requests; l; l = l->next)
	{
		struct gg_dcc *local_dcc = (gg_dcc*)l->data;
		if (local_dcc) gg_free_dcc(local_dcc);
	}
	list_destroy(watches, 0);
	list_destroy(transfers, 0);
	list_destroy(requests, 0);

	gg_dcc_port = 0;
	gg_dcc_ip = 0;
	debugLogA("dccmainthread(): end. DCC Server Thread Ending");
}
Ejemplo n.º 8
0
static int _check_for_booted_overlapping_blocks(
	List block_list, ListIterator bg_record_itr,
	bg_record_t *bg_record, int overlap_check, List overlapped_list,
	uint16_t query_mode)
{
	bg_record_t *found_record = NULL;
	ListIterator itr = NULL;
	int rc = 0;
	int overlap = 0;
	bool is_test = SELECT_IS_TEST(query_mode);

	/* this test only is for actually picking a block not testing */
	if (is_test && bg_conf->layout_mode == LAYOUT_DYNAMIC)
		return rc;

	/* Make sure no other blocks are under this block
	   are booted and running jobs
	*/
	itr = list_iterator_create(block_list);
	while ((found_record = (bg_record_t*)list_next(itr)) != NULL) {
		if ((!found_record->bg_block_id)
		    || (bg_record == found_record)) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
				info("Don't need to look at myself %s %s",
				     bg_record->bg_block_id,
				     found_record->bg_block_id);
			continue;
		}

		slurm_mutex_lock(&block_state_mutex);
		overlap = blocks_overlap(bg_record, found_record);
		slurm_mutex_unlock(&block_state_mutex);

		if (overlap) {
			overlap = 0;
			/* make the available time on this block
			 * (bg_record) the max of this found_record's job
			 * or the one already set if in overlapped_block_list
			 * since we aren't setting job_running we
			 * don't have to remove them since the
			 * block_list should always be destroyed afterwards.
			 */
			if (is_test && overlapped_list
			    && found_record->job_ptr
			    && bg_record->job_running == NO_JOB_RUNNING) {
				ListIterator itr = list_iterator_create(
					overlapped_list);
				bg_record_t *tmp_rec = NULL;

				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK)
					info("found overlapping block %s "
					     "overlapped %s with job %u",
					     found_record->bg_block_id,
					     bg_record->bg_block_id,
					     found_record->job_ptr->job_id);

				while ((tmp_rec = list_next(itr))) {
					if (tmp_rec == bg_record)
						break;
				}
				list_iterator_destroy(itr);
				if (tmp_rec && tmp_rec->job_ptr->end_time
				    < found_record->job_ptr->end_time)
					tmp_rec->job_ptr =
						found_record->job_ptr;
				else if (!tmp_rec) {
					bg_record->job_ptr =
						found_record->job_ptr;
					list_append(overlapped_list,
						    bg_record);
				}
			}
			/* We already know this block doesn't work
			 * right now so we will if there is another
			 * overlapping block that ends later
			 */
			if (rc)
				continue;
			/* This test is here to check if the block we
			 * chose is not booted or if there is a block
			 * overlapping that we could avoid freeing if
			 * we choose something else
			 */
			if (bg_conf->layout_mode == LAYOUT_OVERLAP
			    && ((overlap_check == 0 && bg_record->state
				 != BG_BLOCK_INITED)
				|| (overlap_check == 1 && found_record->state
				    != BG_BLOCK_FREE))) {

				if (!is_test) {
					rc = 1;
					break;
				}
			}

			if (((bg_conf->layout_mode == LAYOUT_DYNAMIC)
			     || ((!SELECT_IS_CHECK_FULL_SET(query_mode)
				  || SELECT_IS_MODE_RUN_NOW(query_mode))
				 && (bg_conf->layout_mode != LAYOUT_DYNAMIC)))
			    && ((found_record->job_running != NO_JOB_RUNNING)
				|| (found_record->state
				    & BG_BLOCK_ERROR_FLAG))) {
				if ((found_record->job_running
				     == BLOCK_ERROR_STATE)
				    || (found_record->state
					& BG_BLOCK_ERROR_FLAG))
					error("can't use %s, "
					      "overlapping block %s "
					      "is in an error state.",
					      bg_record->bg_block_id,
					      found_record->bg_block_id);
				else if (bg_conf->slurm_debug_flags
					 & DEBUG_FLAG_BG_PICK)
					info("can't use %s, there is "
					     "a job (%d) running on "
					     "an overlapping "
					     "block %s",
					     bg_record->bg_block_id,
					     found_record->job_running,
					     found_record->bg_block_id);

				if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
					List tmp_list = list_create(NULL);
					/* this will remove and
					 * destroy the memory for
					 * bg_record
					 */
					list_remove(bg_record_itr);
					slurm_mutex_lock(&block_state_mutex);

					if (bg_record->original) {
						if (bg_conf->slurm_debug_flags
						    & DEBUG_FLAG_BG_PICK)
							info("This was a "
							     "copy %s",
							     bg_record->
							     bg_block_id);
						found_record =
							bg_record->original;
					} else {
						if (bg_conf->slurm_debug_flags
						    & DEBUG_FLAG_BG_PICK)
							info("looking for "
							     "original");
						found_record =
							find_org_in_bg_list(
								bg_lists->main,
								bg_record);
					}

					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_BG_PICK)
						info("Removing unusable block "
						     "%s from the system.",
						     bg_record->bg_block_id);

					if (!found_record) {
						if (bg_conf->slurm_debug_flags
						    & DEBUG_FLAG_BG_PICK)
							info("This record %s "
							     "wasn't found in "
							     "the "
							     "bg_lists->main, "
							     "no big deal, it "
							     "probably wasn't "
							     "added",
							     bg_record->
							     bg_block_id);
						found_record = bg_record;
					} else
						destroy_bg_record(bg_record);

					list_push(tmp_list, found_record);
					slurm_mutex_unlock(&block_state_mutex);

					/* We need to make sure if a
					   job is running here to not
					   call the regular method since
					   we are inside the job write
					   lock already.
					*/
					if (found_record->job_ptr
					    && !IS_JOB_FINISHED(
						    found_record->job_ptr)) {
						info("Somehow block %s "
						     "is being freed, but "
						     "appears to already have "
						     "a job %u(%u) running "
						     "on it.",
						     found_record->bg_block_id,
						     found_record->
						     job_ptr->job_id,
						     found_record->job_running);
						if (job_requeue(0,
								found_record->
								job_ptr->job_id,
								-1,
								(uint16_t)
								NO_VAL,
								false)) {
							error("Couldn't "
							      "requeue job %u, "
							      "failing it: %s",
							      found_record->
							      job_ptr->job_id,
							      slurm_strerror(
								      rc));
							job_fail(found_record->
								 job_ptr->
								 job_id);
						}
					}

					free_block_list(NO_VAL, tmp_list, 0, 0);
					list_destroy(tmp_list);
				}
				rc = 1;

				if (!is_test)
					break;
			}
		}
	}
	list_iterator_destroy(itr);

	return rc;
}
Ejemplo n.º 9
0
static int _dynamically_request(List block_list, int *blocks_added,
				select_ba_request_t *request,
				char *user_req_nodes,
				uint16_t query_mode)
{
	List list_of_lists = NULL;
	List temp_list = NULL;
	List new_blocks = NULL;
	List job_list = NULL, booted_list = NULL;
	ListIterator itr = NULL;
	int rc = SLURM_ERROR;
	int create_try = 0;

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("going to create %d", request->size);
	list_of_lists = list_create(NULL);

	/* If preempt is set and we are checking full system it means
	   we altered the block list so only look at it.
	*/
	if (SELECT_IS_PREEMPT_SET(query_mode)
	    && SELECT_IS_CHECK_FULL_SET(query_mode)) {
		list_append(list_of_lists, block_list);
	} else if (user_req_nodes) {
		slurm_mutex_lock(&block_state_mutex);
		job_list = copy_bg_list(bg_lists->job_running);
		list_append(list_of_lists, job_list);
		slurm_mutex_unlock(&block_state_mutex);
	} else {
		slurm_mutex_lock(&block_state_mutex);
		list_append(list_of_lists, block_list);
		if (list_count(block_list) != list_count(bg_lists->booted)) {
			booted_list = copy_bg_list(bg_lists->booted);
			list_append(list_of_lists, booted_list);
			if (list_count(bg_lists->booted)
			    != list_count(bg_lists->job_running)) {
				job_list = copy_bg_list(bg_lists->job_running);
				list_append(list_of_lists, job_list);
			}
		} else if (list_count(block_list)
			   != list_count(bg_lists->job_running)) {
			job_list = copy_bg_list(bg_lists->job_running);
			list_append(list_of_lists, job_list);
		}
		slurm_mutex_unlock(&block_state_mutex);
	}
	itr = list_iterator_create(list_of_lists);
	while ((temp_list = (List)list_next(itr))) {
		create_try++;

		/* 1- try empty space
		   2- we see if we can create one in the
		   unused mps
		   3- see if we can create one in the non
		   job running mps
		*/
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("trying with %d", create_try);
		if ((new_blocks = create_dynamic_block(
			     block_list, request, temp_list, true))) {
			bg_record_t *bg_record = NULL;
			while ((bg_record = list_pop(new_blocks))) {
				if (block_exist_in_list(block_list, bg_record))
					destroy_bg_record(bg_record);
				else if (SELECT_IS_TEST(query_mode)
					 || SELECT_IS_PREEMPT_ON_FULL_TEST(
						 query_mode)) {
					/* Here we don't really want
					   to create the block if we
					   are testing.

					   The second test here is to
					   make sure If we are able to
					   run here but we just
					   preempted we should wait a
					   bit to make sure the
					   preempted blocks have time
					   to clear out.
					*/
					list_append(block_list, bg_record);
					(*blocks_added) = 1;
				} else {
					if (bridge_block_create(bg_record)
					    == SLURM_ERROR) {
						destroy_bg_record(bg_record);
						error("_dynamically_request: "
						      "unable to configure "
						      "block");
						rc = SLURM_ERROR;
						break;
					}
					list_append(block_list, bg_record);
					print_bg_record(bg_record);
					(*blocks_added) = 1;
				}
			}
			list_destroy(new_blocks);
			if (!*blocks_added) {
				rc = SLURM_ERROR;
				continue;
			}
			list_sort(block_list,
				  (ListCmpF)bg_record_sort_aval_inc);

			rc = SLURM_SUCCESS;
			break;
		} else if (errno == ESLURM_INTERCONNECT_FAILURE) {
			rc = SLURM_ERROR;
			break;
		}
	}
	list_iterator_destroy(itr);

	if (list_of_lists)
		list_destroy(list_of_lists);
	if (job_list)
		list_destroy(job_list);
	if (booted_list)
		list_destroy(booted_list);

	return rc;
}
int bfs(Graph * graph, BfsVertex * start, List * hops)
{

	Queue queue;

	AdjList *adjlist, *clr_adjlist;

	BfsVertex *clr_vertex, *adj_vertex;

	ListElmt *element, *member;

/*****************************************************************************
*  Initialize all of the vertices in the graph.                              *
*****************************************************************************/

	for (element = list_head(&graph_adjlists(graph)); element != NULL;
	     element = list_next(element)) {

		clr_vertex = ((AdjList *) list_data(element))->vertex;

		if (graph->match(clr_vertex, start)) {

      /***********************************************************************
      *  Initialize the start vertex.                                        *
      ***********************************************************************/

			clr_vertex->color = gray;
			clr_vertex->hops = 0;

		}

		else {

      /***********************************************************************
      *  Initialize vertices other than the start vertex.                    *
      ***********************************************************************/

			clr_vertex->color = white;
			clr_vertex->hops = -1;

		}

	}

/*****************************************************************************
*  Initialize the queue with the adjacency list of the start vertex.         *
*****************************************************************************/

	queue_init(&queue, NULL);

	if (graph_adjlist(graph, start, &clr_adjlist) != 0) {

		queue_destroy(&queue);
		return -1;

	}

	if (queue_enqueue(&queue, clr_adjlist) != 0) {

		queue_destroy(&queue);
		return -1;

	}

/*****************************************************************************
*  Perform breadth-first search.                                             *
*****************************************************************************/

	while (queue_size(&queue) > 0) {

		adjlist = queue_peek(&queue);

   /**************************************************************************
   *  Traverse each vertex in the current adjacency list.                    *
   **************************************************************************/

		for (member = list_head(&adjlist->adjacent); member != NULL;
		     member = list_next(member)) {

			adj_vertex = list_data(member);

      /***********************************************************************
      *  Determine the color of the next adjacent vertex.                    *
      ***********************************************************************/

			if (graph_adjlist(graph, adj_vertex, &clr_adjlist) != 0) {

				queue_destroy(&queue);
				return -1;

			}

			clr_vertex = clr_adjlist->vertex;

      /***********************************************************************
      *  Color each white vertex gray and enqueue its adjacency list.        *
      ***********************************************************************/

			if (clr_vertex->color == white) {

				clr_vertex->color = gray;
				clr_vertex->hops =
				    ((BfsVertex *) adjlist->vertex)->hops + 1;

				if (queue_enqueue(&queue, clr_adjlist) != 0) {

					queue_destroy(&queue);
					return -1;

				}

			}

		}

   /**************************************************************************
   *  Dequeue the current adjacency list and color its vertex black.         *
   **************************************************************************/

		if (queue_dequeue(&queue, (void **)&adjlist) == 0) {

			((BfsVertex *) adjlist->vertex)->color = black;

		}

		else {

			queue_destroy(&queue);
			return -1;

		}

	}

	queue_destroy(&queue);

/*****************************************************************************
*  Pass back the hop count for each vertex in a list.                        *
*****************************************************************************/

	list_init(hops, NULL);

	for (element = list_head(&graph_adjlists(graph)); element != NULL;
	     element = list_next(element)) {

   /**************************************************************************
   *  Skip vertices that were not visited (those with hop counts of -1).     *
   **************************************************************************/

		clr_vertex = ((AdjList *) list_data(element))->vertex;

		if (clr_vertex->hops != -1) {

			if (list_ins_next(hops, list_tail(hops), clr_vertex) !=
			    0) {

				list_destroy(hops);
				return -1;

			}

		}

	}

	return 0;

}
Ejemplo n.º 11
0
/*
 * Try to find resources for a given job request
 * IN job_ptr - pointer to job record in slurmctld
 * IN/OUT bitmap - nodes available for assignment to job, clear those not to
 *	be used
 * IN min_nodes, max_nodes  - minimum and maximum number of nodes to allocate
 *	to this job (considers slurm block limits)
 * IN mode - SELECT_MODE_RUN_NOW: try to schedule job now
 *           SELECT_MODE_TEST_ONLY: test if job can ever run
 *           SELECT_MODE_WILL_RUN: determine when and where job can run
 * IN preemptee_candidates - List of pointers to jobs which can be preempted.
 * IN/OUT preemptee_job_list - Pointer to list of job pointers. These are the
 *		jobs to be preempted to initiate the pending job. Not set
 *		if mode=SELECT_MODE_TEST_ONLY or input pointer is NULL.
 * RET - SLURM_SUCCESS if job runnable now, error code otherwise
 */
extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap,
		      uint32_t min_nodes, uint32_t max_nodes,
		      uint32_t req_nodes, uint16_t mode,
		      List preemptee_candidates,
		      List *preemptee_job_list)
{
	int rc = SLURM_SUCCESS;
	bg_record_t* bg_record = NULL;
	char buf[256];
	uint16_t conn_type[SYSTEM_DIMENSIONS];
	List block_list = NULL;
	int blocks_added = 0;
	time_t starttime = time(NULL);
	uint16_t local_mode = mode;
	int avail_cpus = num_unused_cpus;
	int dim = 0;

	for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
		conn_type[dim] = (uint16_t)NO_VAL;
	if (preemptee_candidates && preemptee_job_list
	    && list_count(preemptee_candidates))
		local_mode |= SELECT_MODE_PREEMPT_FLAG;
	else
		local_mode |= SELECT_MODE_CHECK_FULL;

	if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
		slurm_mutex_lock(&create_dynamic_mutex);

	slurm_mutex_lock(&block_state_mutex);
	block_list = copy_bg_list(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);

	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_CONN_TYPE, &conn_type);
	if (conn_type[0] == SELECT_NAV) {
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			conn_type[0] = SELECT_SMALL;
		else if (min_nodes > 1) {
			for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
				conn_type[dim] = SELECT_TORUS;
		} else if (job_ptr->details->min_cpus < bg_conf->cpus_per_mp)
			conn_type[0] = SELECT_SMALL;
		else {
			for (dim=1; dim<SYSTEM_DIMENSIONS; dim++)
				conn_type[dim] = SELECT_NAV;
		}
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_CONN_TYPE,
				   &conn_type);
	}

	if (slurm_block_bitmap && !bit_set_count(slurm_block_bitmap)) {
		error("no nodes given to place job %u.", job_ptr->job_id);

		if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
			slurm_mutex_unlock(&create_dynamic_mutex);

		return SLURM_ERROR;
	}

	sprint_select_jobinfo(job_ptr->select_jobinfo->data,
			      buf, sizeof(buf),
			      SELECT_PRINT_MIXED);

	debug("bluegene:submit_job: %u mode=%d %s nodes=%u-%u-%u",
	      job_ptr->job_id, local_mode, buf,
	      min_nodes, req_nodes, max_nodes);

#ifdef HAVE_BG_L_P
# ifdef HAVE_BGL
	sprint_select_jobinfo(job_ptr->select_jobinfo->data,
			      buf, sizeof(buf),
			      SELECT_PRINT_BLRTS_IMAGE);
	debug3("BlrtsImage=%s", buf);
# endif
	sprint_select_jobinfo(job_ptr->select_jobinfo->data,
			      buf, sizeof(buf),
			      SELECT_PRINT_LINUX_IMAGE);
# ifdef HAVE_BGL
	debug3("LinuxImage=%s", buf);
# else
	debug3("ComputNodeImage=%s", buf);
# endif
	sprint_select_jobinfo(job_ptr->select_jobinfo->data,
			      buf, sizeof(buf),
			      SELECT_PRINT_RAMDISK_IMAGE);
# ifdef HAVE_BGL
	debug3("RamDiskImage=%s", buf);
# else
	debug3("RamDiskIoLoadImage=%s", buf);
# endif
#endif
	sprint_select_jobinfo(job_ptr->select_jobinfo->data,
			      buf, sizeof(buf),
			      SELECT_PRINT_MLOADER_IMAGE);
	debug3("MloaderImage=%s", buf);

	/* First look at the empty space, and then remove the
	   preemptable jobs and try again. */
	list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc);

	rc = _find_best_block_match(block_list, &blocks_added,
				    job_ptr, slurm_block_bitmap, min_nodes,
				    max_nodes, req_nodes,
				    &bg_record, local_mode, avail_cpus);

	if (rc == SLURM_SUCCESS && SELECT_IS_PREEMPT_SET(local_mode)) {
		ListIterator itr;
		ListIterator job_itr;
		bg_record_t *found_record;
		struct job_record *preempt_job_ptr;

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("doing preemption");
		local_mode |= SELECT_MODE_CHECK_FULL;

		job_itr = list_iterator_create(preemptee_candidates);
		itr = list_iterator_create(block_list);
		while ((preempt_job_ptr = list_next(job_itr))) {
			while ((found_record = list_next(itr))) {
				if (found_record->job_ptr == preempt_job_ptr) {
					/* info("removing job %u running on %s", */
					/*      preempt_job_ptr->job_id, */
					/*      found_record->bg_block_id); */
					found_record->job_ptr = NULL;
					found_record->job_running =
						NO_JOB_RUNNING;
					avail_cpus += found_record->cpu_cnt;
					break;
				}
			}
			if (!found_record) {
				list_iterator_reset(itr);
				error("Job %u wasn't found running anywhere, "
				      "can't preempt",
				      preempt_job_ptr->job_id);
				continue;
			} else if (job_ptr->details->min_cpus > avail_cpus)
				continue;

			list_sort(block_list,
				  (ListCmpF)bg_record_sort_aval_inc);
			if ((rc = _find_best_block_match(
				     block_list, &blocks_added,
				     job_ptr, slurm_block_bitmap,
				     min_nodes, max_nodes, req_nodes,
				     &bg_record, local_mode, avail_cpus))
			    == SLURM_SUCCESS)
				break;

			list_iterator_reset(itr);
		}
		list_iterator_destroy(itr);
		list_iterator_destroy(job_itr);
	}

	if (rc == SLURM_SUCCESS) {
		if (!bg_record)
			fatal("we got a success, but no block back");
		/* Here we see if there is a job running since
		 * some jobs take awhile to finish we need to
		 * make sure the time of the end is in the
		 * future.  If it isn't (meaning it is in the
		 * past or current time) we add 5 seconds to
		 * it so we don't use the block immediately.
		 */
		if (bg_record->job_ptr
		    && bg_record->job_ptr->end_time) {
			if (bg_record->job_ptr->end_time <= starttime)
				starttime += 5;
			else
				starttime = bg_record->job_ptr->end_time;
		} else if (bg_record->job_running == BLOCK_ERROR_STATE)
			starttime = INFINITE;

		/* make sure the job is eligible to run */
		if (job_ptr->details->begin_time > starttime)
			starttime = job_ptr->details->begin_time;

		job_ptr->start_time = starttime;

		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_NODES,
				   bg_record->mp_str);
		set_select_jobinfo(job_ptr->select_jobinfo->data,
				   SELECT_JOBDATA_IONODES,
				   bg_record->ionode_str);
		if (!bg_record->bg_block_id) {
			debug("%d can start unassigned job %u "
			      "at %ld on %s",
			      local_mode, job_ptr->job_id,
			      starttime, bg_record->mp_str);

			set_select_jobinfo(job_ptr->select_jobinfo->data,
					   SELECT_JOBDATA_BLOCK_PTR,
					   NULL);
			set_select_jobinfo(job_ptr->select_jobinfo->data,
					   SELECT_JOBDATA_NODE_CNT,
					   &bg_record->cnode_cnt);
		} else {
			if ((bg_record->ionode_str)
			    && (job_ptr->part_ptr->max_share <= 1))
				error("Small block used in "
				      "non-shared partition");

			debug("%d(%d) can start job %u "
			      "at %ld on %s(%s) %d",
			      local_mode, mode, job_ptr->job_id,
			      starttime, bg_record->bg_block_id,
			      bg_record->mp_str,
			      SELECT_IS_MODE_RUN_NOW(local_mode));

			if (SELECT_IS_MODE_RUN_NOW(local_mode)) {
				/* Set this up to be the
				   correct pointer since we
				   probably are working off a
				   copy.
				*/
				if (bg_record->original)
					bg_record = bg_record->original;
				set_select_jobinfo(
					job_ptr->select_jobinfo->data,
					SELECT_JOBDATA_BLOCK_PTR,
					bg_record);
				if (job_ptr) {
					bg_record->job_running =
						job_ptr->job_id;
					bg_record->job_ptr = job_ptr;

					job_ptr->job_state |= JOB_CONFIGURING;
					last_bg_update = time(NULL);
				}
			} else {
				set_select_jobinfo(
					job_ptr->select_jobinfo->data,
					SELECT_JOBDATA_BLOCK_PTR,
					NULL);
				/* Just to make sure we don't
				   end up using this on
				   another job, or we have to
				   wait until preemption is
				   done.
				*/
				bg_record->job_ptr = NULL;
				bg_record->job_running = NO_JOB_RUNNING;
			}

			set_select_jobinfo(job_ptr->select_jobinfo->data,
					   SELECT_JOBDATA_NODE_CNT,
					   &bg_record->cnode_cnt);
		}
		if (SELECT_IS_MODE_RUN_NOW(local_mode))
			_build_select_struct(job_ptr,
					     slurm_block_bitmap,
					     bg_record->cnode_cnt);
		/* set up the preempted job list */
		if (SELECT_IS_PREEMPT_SET(local_mode)) {
			if (*preemptee_job_list)
				list_destroy(*preemptee_job_list);
			*preemptee_job_list = _get_preemptables(
				local_mode, bg_record,
				preemptee_candidates);
		}
		if (!bg_record->bg_block_id) {
			/* This is a fake record so we need to
			 * destroy it after we get the info from
			 * it.  If it was just testing then
			 * we added this record to the
			 * block_list.  If this is the case
			 * it will be handled if se sync the
			 * lists.  But we don't want to do
			 * that so we will set blocks_added to
			 * 0 so it doesn't happen. */
			if (!blocks_added) {
				destroy_bg_record(bg_record);
				bg_record = NULL;
			}
			blocks_added = 0;
		}
		last_job_update = time(NULL);
	}

	if (bg_conf->layout_mode == LAYOUT_DYNAMIC) {
		slurm_mutex_lock(&block_state_mutex);
		if (blocks_added)
			_sync_block_lists(block_list, bg_lists->main);
		slurm_mutex_unlock(&block_state_mutex);
		slurm_mutex_unlock(&create_dynamic_mutex);
	}

	list_destroy(block_list);
	return rc;
}
Ejemplo n.º 12
0
void design_defaults_cleanup(void)
{
  if (_g_list) list_destroy(_g_list);
}
Ejemplo n.º 13
0
Archivo: proc.c Proyecto: cse451/os161
/*
 * Destroy a proc structure.
 *
 * Note: nothing currently calls this. Your wait/exit code will
 * probably want to do so.
 */
void
proc_destroy(struct proc *proc)
{
	/*
	 * You probably want to destroy and null out much of the
	 * process (particularly the address space) at exit time if
	 * your wait/exit design calls for the process structure to
	 * hang around beyond process exit. Some wait/exit designs
	 * do, some don't.
	 */

	KASSERT(proc != NULL);
	KASSERT(proc != kproc);

	/*
	 * We don't take p_lock in here because we must have the only
	 * reference to this structure. (Otherwise it would be
	 * incorrect to destroy it.)
	 */

	/* VFS fields */
	if (proc->p_cwd) {
		VOP_DECREF(proc->p_cwd);
		proc->p_cwd = NULL;
	}

	/* VM fields */
	if (proc->p_addrspace) {
		/*
		 * If p is the current process, remove it safely from
		 * p_addrspace before destroying it. This makes sure
		 * we don't try to activate the address space while
		 * it's being destroyed.
		 *
		 * Also explicitly deactivate, because setting the
		 * address space to NULL won't necessarily do that.
		 *
		 * (When the address space is NULL, it means the
		 * process is kernel-only; in that case it is normally
		 * ok if the MMU and MMU- related data structures
		 * still refer to the address space of the last
		 * process that had one. Then you save work if that
		 * process is the next one to run, which isn't
		 * uncommon. However, here we're going to destroy the
		 * address space, so we need to make sure that nothing
		 * in VM system still refers to it.)
		 *
		 * The call to as_deactivate() must come after we
		 * clear the address space, or a timer interrupt might
		 * reactivate the old address space again behind our
		 * back.
		 *
		 * If p is not the current process, still remove it
		 * from p_addrspace before destroying it as a
		 * precaution. Note that if p is not the current
		 * process, in order to be here p must either have
		 * never run (e.g. cleaning up after fork failed) or
		 * have finished running and exited. It is quite
		 * incorrect to destroy the proc structure of some
		 * random other process while it's still running...
		 */
		struct addrspace *as;

		if (proc == curproc) {
			as = proc_setas(NULL);
			as_deactivate();
		}
		else {
			as = proc->p_addrspace;
			proc->p_addrspace = NULL;
		}
		as_destroy(as);
	}

	threadarray_cleanup(&proc->p_threads);
	spinlock_cleanup(&proc->p_lock);

	release_process_id(proc->PID);

	/* Child list */
	//proclist_cleanup(&proc->p_childlist);
	list_destroy(proc->p_childlist);
	lock_destroy(proc->p_childlist_lock);
	fd_table_destroy(proc->p_fd_table);

	kfree(proc->p_name);
	kfree(proc);
}
Ejemplo n.º 14
0
extern int
pgsql_db_make_table_current(PGconn *pgsql_db, char *schema, char *table_name,
			    storage_field_t *fields)
{
	char *query = NULL, *opt_part = NULL, *temp_char = NULL;
	char *type = NULL;
	int not_null = 0;
	char *default_str = NULL;
	char* original_ptr = NULL;
	int i = 0;
	PGresult *result = NULL;
	List columns = NULL;
	ListIterator itr = NULL;
	char *col = NULL;

	DEF_TIMERS;

	query = xstrdup_printf("select column_name from "
			       "information_schema.columns where "
			       "table_name='%s' and table_schema='%s' ",
			       table_name, schema);

	if (!(result = pgsql_db_query_ret(pgsql_db, query))) {
		xfree(query);
		return SLURM_ERROR;
	}
	xfree(query);
	columns = list_create(slurm_destroy_char);
	for (i = 0; i < PQntuples(result); i++) {
		col = xstrdup(PQgetvalue(result, i, 0)); //column_name
		list_append(columns, col);
	}
	PQclear(result);
	itr = list_iterator_create(columns);
	query = xstrdup_printf("alter table %s.%s", schema, table_name);
	START_TIMER;
	i=0;
	while (fields[i].name) {
		int found = 0;
		not_null = 0;
		if (!strcasecmp("serial", fields[i].options)) {
			i++;
			continue;
		}
		opt_part = xstrdup(fields[i].options);
		original_ptr = opt_part;
		opt_part = strtok_r(opt_part, " ", &temp_char);
		if (opt_part) {
			/* XXX: only one identifier supported */
			type = xstrdup(opt_part);
			opt_part = strtok_r(NULL, " ", &temp_char);
			while (opt_part) {
				if (!strcasecmp("not", opt_part)) {
					opt_part = strtok_r(NULL, " ",
							    &temp_char);
					if (!strcasecmp("null", opt_part)) {
						not_null = 1;

					}
				} else if (!strcasecmp("default", opt_part)){
					opt_part = strtok_r(NULL,
							    " ", &temp_char);
					default_str = xstrdup(opt_part);
				}
				opt_part = strtok_r(NULL,
						    " ", &temp_char);
			}
		} else {
			type = xstrdup(fields[i].options);
		}
		xfree(original_ptr);
		list_iterator_reset(itr);
		while ((col = list_next(itr))) {
			if (!strcmp(col, fields[i].name)) {
				list_delete_item(itr);
				found = 1;
				break;
			}
		}

		temp_char = NULL;
		if (!found) {
			info("adding column %s", fields[i].name);
			if (default_str)
				xstrfmtcat(temp_char,
					   " default %s", default_str);

			if (not_null)
				xstrcat(temp_char, " not null");

			xstrfmtcat(query,
				   " add %s %s",
				   fields[i].name, type);
			if (temp_char)
				xstrcat(query, temp_char);
			xstrcat(query, ",");
		} else {
			if (default_str)
				xstrfmtcat(temp_char,
					   " alter %s set default %s,",
					   fields[i].name, default_str);
			else
				xstrfmtcat(temp_char,
					   " alter %s drop default,",
					   fields[i].name);

			if (not_null)
				xstrfmtcat(temp_char,
					   " alter %s set not null,",
					   fields[i].name);
			else
				xstrfmtcat(temp_char,
					   " alter %s drop not null,",
					   fields[i].name);
			xstrfmtcat(query, " alter %s type %s,%s",
				   fields[i].name, type, temp_char);
		}
		xfree(temp_char);
		xfree(default_str);
		xfree(type);

		i++;
	}
	list_iterator_destroy(itr);
	list_destroy(columns);
	query[strlen(query)-1] = ';';

	//debug4("pgsql db create/alter table:\n %s", query);
	if (pgsql_db_query(pgsql_db, query)) {
		xfree(query);
		return SLURM_ERROR;
	}
	xfree(query);

	END_TIMER2("make table current");
	return SLURM_SUCCESS;
}
Ejemplo n.º 15
0
void destroy_table(){
	list_destroy(table_des_symboles);
}
Ejemplo n.º 16
0
/*
 * finds the best match for a given job request
 *
 *
 * OUT - block_id of matched block, NULL otherwise
 * returns 1 for error (no match)
 *
 */
static int _find_best_block_match(List block_list,
				  int *blocks_added,
				  struct job_record* job_ptr,
				  bitstr_t* slurm_block_bitmap,
				  uint32_t min_nodes, uint32_t max_nodes,
				  uint32_t req_nodes,
				  bg_record_t** found_bg_record,
				  uint16_t query_mode, int avail_cpus)
{
	bg_record_t *bg_record = NULL;
	uint16_t req_geometry[SYSTEM_DIMENSIONS];
	uint16_t target_size = 0;
	uint32_t req_procs = job_ptr->details->min_cpus;
	select_ba_request_t request;
	int i, dim;
	int overlap_check = 0;
	int allow = 0;
	int check_image = 1;
	uint32_t max_cpus = job_ptr->details->max_cpus;
	char tmp_char[256];
	static int total_cpus = 0;
	int rc = SLURM_SUCCESS;
	int create_try = 0;
	List overlapped_list = NULL;
	bool is_test = SELECT_IS_TEST(query_mode);

	if (!total_cpus) {
		int *cluster_dims = select_g_ba_get_dims();
		total_cpus = 1;
		for (dim=0; dim<SYSTEM_DIMENSIONS; dim++)
			total_cpus *= cluster_dims[dim];
		total_cpus *= bg_conf->cpus_per_mp;
	}

	if (req_nodes > max_nodes) {
		error("can't run this job max mps is %u asking for %u",
		      max_nodes, req_nodes);
		return SLURM_ERROR;
	}

	if (!is_test && (req_procs > avail_cpus)) {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("asking for %u I only have %d",
			     req_procs, avail_cpus);
		return SLURM_ERROR;
	}

	if (!block_list) {
		error("_find_best_block_match: There is no block_list");
		return SLURM_ERROR;
	}

	memset(&request, 0, sizeof(select_ba_request_t));

	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_CONN_TYPE, &request.conn_type);
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_GEOMETRY, &req_geometry);
	get_select_jobinfo(job_ptr->select_jobinfo->data,
			   SELECT_JOBDATA_ROTATE, &request.rotate);

	if ((rc = _check_images(job_ptr, &request)) == SLURM_ERROR)
		goto end_it;

	if (req_geometry[0] != 0 && req_geometry[0] != (uint16_t)NO_VAL) {
		char tmp_geo[SYSTEM_DIMENSIONS+1];

		target_size = 1;
		for (i=0; i<SYSTEM_DIMENSIONS; i++) {
			target_size *= req_geometry[i];
			tmp_geo[i] = alpha_num[req_geometry[i]];
		}
		tmp_geo[i] = '\0';

		if (target_size != min_nodes) {
			debug2("min_nodes not set correctly %u "
			       "should be %u from %s",
			       min_nodes, target_size,
			       tmp_geo);
			min_nodes = target_size;
		}
		if (!req_nodes)
			req_nodes = min_nodes;
	} else {
		req_geometry[0] = (uint16_t)NO_VAL;
		target_size = min_nodes;
	}

	*found_bg_record = NULL;
	allow = 0;

	memcpy(request.geometry, req_geometry, sizeof(req_geometry));

	request.deny_pass = (uint16_t)NO_VAL;
	request.save_name = NULL;
	request.size = target_size;
	request.procs = req_procs;
	request.elongate = request.rotate;
	/* request.start[0] = 1; */
	/* request.start[1] = 2; */
	/* request.start[2] = 0; */
	/* request.start[3] = 2; */
	/* request.start_req = 1; */

	if (job_ptr->details->req_node_bitmap)
		request.avail_mp_bitmap = job_ptr->details->req_node_bitmap;
	else
		request.avail_mp_bitmap = slurm_block_bitmap;

	/* since we only look at procs after this and not nodes we
	 *  need to set a max_cpus if given
	 */
	if (max_cpus == (uint32_t)NO_VAL)
		max_cpus = max_nodes * bg_conf->cpus_per_mp;

	while (1) {
		/* Here we are creating a list of all the blocks that
		 * have overlapped jobs so if we don't find one that
		 * works we will have can look and see the earliest
		 * the job can start.  This doesn't apply to Dynamic mode.
		 */
		if (is_test && SELECT_IS_CHECK_FULL_SET(query_mode)
		    && bg_conf->layout_mode != LAYOUT_DYNAMIC)
			overlapped_list = list_create(NULL);

		bg_record = _find_matching_block(block_list,
						 job_ptr,
						 slurm_block_bitmap,
						 &request,
						 max_cpus,
						 &allow, check_image,
						 overlap_check,
						 overlapped_list,
						 query_mode);
		/* this could get altered in _find_matching_block so we
		   need to reset it */
		memcpy(request.geometry, req_geometry, sizeof(req_geometry));

		if (!bg_record && overlapped_list
		    && list_count(overlapped_list)) {
			ListIterator itr =
				list_iterator_create(overlapped_list);
			bg_record_t *tmp_rec = NULL;
			while ((tmp_rec = list_next(itr))) {
				if (!bg_record ||
				    (tmp_rec->job_ptr->end_time <
				     bg_record->job_ptr->end_time))
					bg_record = tmp_rec;
			}
			list_iterator_destroy(itr);
		}

		if (overlapped_list)
			list_destroy(overlapped_list);

		/* set the bitmap and do other allocation activities */
		if (bg_record) {
#ifdef HAVE_BG_L_P
			if (!is_test) {
				if (bridge_block_check_mp_states(
					    bg_record->bg_block_id, 1)
				    != SLURM_SUCCESS) {
					/* check_block_mp_states will
					   set this block in the main
					   list to an error state, but
					   we aren't looking
					   at the main list, so we
					   need to set this copy of
					   the block in an
					   error state as well.
					*/
					bg_record->job_running =
						BLOCK_ERROR_STATE;
					bg_record->state |= BG_BLOCK_ERROR_FLAG;
					error("_find_best_block_match: Picked "
					      "block (%s) had some issues with "
					      "hardware, trying a different "
					      "one.",
					      bg_record->bg_block_id);
					continue;
				}
			}
#endif
			format_node_name(bg_record, tmp_char, sizeof(tmp_char));

			debug("_find_best_block_match %s <%s>",
			      bg_record->bg_block_id, tmp_char);
			bit_and(slurm_block_bitmap, bg_record->mp_bitmap);
			rc = SLURM_SUCCESS;
			*found_bg_record = bg_record;
			goto end_it;
		}

		/* see if we can just reset the image and reboot the block */
		if (allow) {
			check_image = 0;
			allow = 0;
			continue;
		}

		check_image = 1;

		/* all these assume that the *bg_record is NULL */

		if (bg_conf->layout_mode == LAYOUT_OVERLAP
		    && !is_test && overlap_check < 2) {
			overlap_check++;
			continue;
		}

		if (create_try || bg_conf->layout_mode != LAYOUT_DYNAMIC)
			goto no_match;

		if ((rc = _dynamically_request(block_list, blocks_added,
					       &request,
					       job_ptr->details->req_nodes,
					       query_mode))
		    == SLURM_SUCCESS) {
			create_try = 1;
			continue;
		}

		/* Only look at the full system if we aren't going to
		   preempt jobs later and look.
		*/
		if (is_test && SELECT_IS_CHECK_FULL_SET(query_mode)) {
			List new_blocks = NULL;
			List job_list = list_create(NULL);
			ListIterator itr = NULL;
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
				info("trying with empty machine");

			/* Here we need to make sure the blocks in the
			   job list are those in the block list so go
			   through and grab them and add them to a
			   separate list.
			*/
			itr = list_iterator_create(block_list);
			while ((bg_record = list_next(itr))) {
				if (bg_record->job_running != NO_JOB_RUNNING)
					list_append(job_list, bg_record);
				/* Since the error blocks are at the
				   end we only really need to look at
				   the first one to make sure it will
				   work, so don't add more than one to
				   the job list.
				   We do need to check for at least
				   one error block because that lets
				   us know not to hold up the entire
				   machine for a job that won't run
				   until the error is removed which
				   could be a very long time.
				*/
				if (bg_record->job_running == BLOCK_ERROR_STATE)
					break;
			}
			list_iterator_destroy(itr);

			/* Block list is already in the correct order,
			   earliest avaliable first,
			   so the job list will also be. No need to
			   sort. */
			while (1) {
				bool track_down_nodes = true;

				if ((bg_record = list_pop(job_list))) {
					if (bg_record->job_ptr) {
						if (bg_conf->slurm_debug_flags
						    & DEBUG_FLAG_BG_PICK)
							info("taking off "
							     "%d(%s) started "
							     "at %ld ends "
							     "at %ld",
							     bg_record->
							     job_running,
							     bg_record->
							     bg_block_id,
							     bg_record->
							     job_ptr->
							     start_time,
							     bg_record->
							     job_ptr->
							     end_time);
						/* Mark the block as
						   not running a job,
						   this should
						   correspond to the
						   pointer in the
						   block_list.  We
						   only look at the
						   job_running var so
						   don't remove the
						   job_ptr.
						*/
						bg_record->job_running =
							NO_JOB_RUNNING;
					} else if ((bg_record->job_running
						    == BLOCK_ERROR_STATE)
						   && (bg_conf->
						       slurm_debug_flags
						       & DEBUG_FLAG_BG_PICK))
						info("taking off (%s) "
						     "which is in an "
						     "error state",
						     bg_record->bg_block_id);
				} else
					/* This means we didn't have
					   any jobs to take off
					   anymore so we are making
					   sure we can look at every
					   node on the system.
					*/
					track_down_nodes = false;

				if (!(new_blocks = create_dynamic_block(
					      block_list, &request, job_list,
					      track_down_nodes))) {
					if (errno == ESLURM_INTERCONNECT_FAILURE
					    || !list_count(job_list)) {
						char *nodes;
						if (slurmctld_conf.
						    slurmctld_debug < 5)
							break;
						nodes = bitmap2node_name(
							slurm_block_bitmap);
						debug("job %u not "
						      "runable on %s",
						      job_ptr->job_id,
						      nodes);
						xfree(nodes);
						break;
					}
					continue;
				}
				rc = SLURM_SUCCESS;
				/* outside of the job_test_list this
				 * gets destroyed later, so don't worry
				 * about it now
				 */
				(*found_bg_record) = list_pop(new_blocks);
				if (!(*found_bg_record)) {
					list_destroy(new_blocks);
					if (!bg_record) {
						/* This should never happen */
						error("got an empty list back");
						rc = SLURM_ERROR;
						break;
					}

					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_BG_PICK)
						info("Appears we are trying "
						     "to place this job on "
						     "the block we just "
						     "removed %s.",
						     bg_record->bg_block_id);
					/* This means we placed the job on
					   the block we just popped off.
					*/
					bit_and(slurm_block_bitmap,
						bg_record->mp_bitmap);
					*found_bg_record = bg_record;
					break;
				}
				bit_and(slurm_block_bitmap,
					(*found_bg_record)->mp_bitmap);

				if (bg_record) {
					(*found_bg_record)->job_running =
						bg_record->job_running;
					(*found_bg_record)->job_ptr
						= bg_record->job_ptr;
				}
				list_destroy(new_blocks);
				break;
			}

			list_destroy(job_list);

			goto end_it;
		} else {
			break;
		}
	}

no_match:
	debug("_find_best_block_match none found");
	rc = SLURM_ERROR;

end_it:

	xfree(request.blrtsimage);
	xfree(request.linuximage);
	xfree(request.mloaderimage);
	xfree(request.ramdiskimage);

	return rc;
}
Ejemplo n.º 17
0
int
main (int argc, char *argv[])
{
	int error_code = SLURM_SUCCESS, i, opt_char, input_field_count;
	char **input_fields;
	log_options_t opts = LOG_OPTS_STDERR_ONLY ;
	int local_exit_code = 0;
	char *temp = NULL;
	int option_index;
	static struct option long_options[] = {
		{"help",     0, 0, 'h'},
		{"usage",    0, 0, 'h'},
		{"immediate",0, 0, 'i'},
		{"noheader",0, 0, 'n'},
		{"oneliner", 0, 0, 'o'},
		{"parsable", 0, 0, 'p'},
		{"parsable2", 0, 0, 'P'},
		{"quiet",    0, 0, 'Q'},
		{"readonly", 0, 0, 'r'},
		{"associations", 0, 0, 's'},
		{"verbose",  0, 0, 'v'},
		{"version",  0, 0, 'V'},
		{NULL,       0, 0, 0}
	};

	command_name      = argv[0];
	rollback_flag     = 1;
	exit_code         = 0;
	exit_flag         = 0;
	input_field_count = 0;
	quiet_flag        = 0;
	readonly_flag     = 0;
	verbosity         = 0;
	slurm_conf_init(NULL);
	log_init("sacctmgr", opts, SYSLOG_FACILITY_DAEMON, NULL);

	while((opt_char = getopt_long(argc, argv, "hionpPQrsvV",
			long_options, &option_index)) != -1) {
		switch (opt_char) {
		case (int)'?':
			fprintf(stderr, "Try \"sacctmgr --help\" "
				"for more information\n");
			exit(1);
			break;
		case (int)'h':
			_usage ();
			exit(exit_code);
			break;
		case (int)'i':
			rollback_flag = 0;
			break;
		case (int)'o':
			one_liner = 1;
			break;
		case (int)'n':
			print_fields_have_header = 0;
			break;
		case (int)'p':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_ENDING;
			break;
		case (int)'P':
			print_fields_parsable_print =
			PRINT_FIELDS_PARSABLE_NO_ENDING;
			break;
		case (int)'Q':
			quiet_flag = 1;
			break;
		case (int)'r':
			readonly_flag = 1;
			break;
		case (int)'s':
			with_assoc_flag = 1;
			break;
		case (int)'v':
			quiet_flag = -1;
			verbosity++;
			break;
		case (int)'V':
			_print_version();
			exit(exit_code);
			break;
		default:
			exit_code = 1;
			fprintf(stderr, "getopt error, returned %c\n",
				opt_char);
			exit(exit_code);
		}
	}

	if (argc > MAX_INPUT_FIELDS)	/* bogus input, but continue anyway */
		input_words = argc;
	else
		input_words = 128;
	input_fields = (char **) xmalloc (sizeof (char *) * input_words);
	if (optind < argc) {
		for (i = optind; i < argc; i++) {
			input_fields[input_field_count++] = argv[i];
		}
	}

	if (verbosity) {
		opts.stderr_level += verbosity;
		opts.prefix_level = 1;
		log_alter(opts, 0, NULL);
	}

	/* Check to see if we are running a supported accounting plugin */
	temp = slurm_get_accounting_storage_type();
	if (strcasecmp(temp, "accounting_storage/slurmdbd")
	   && strcasecmp(temp, "accounting_storage/mysql")) {
		fprintf (stderr, "You are not running a supported "
			 "accounting_storage plugin\n(%s).\n"
			 "Only 'accounting_storage/slurmdbd' "
			 "and 'accounting_storage/mysql' are supported.\n",
			temp);
		xfree(temp);
		exit(1);
	}
	xfree(temp);

	errno = 0;
	db_conn = slurmdb_connection_get();
	if (errno != SLURM_SUCCESS) {
		int tmp_errno = errno;
		if ((input_field_count == 2) &&
		   (!strncasecmp(argv[2], "Configuration", strlen(argv[1]))) &&
		   ((!strncasecmp(argv[1], "list", strlen(argv[0]))) ||
		    (!strncasecmp(argv[1], "show", strlen(argv[0]))))) {
			if (tmp_errno == ESLURM_DB_CONNECTION) {
				tmp_errno = 0;
				sacctmgr_list_config(true);
			} else
				sacctmgr_list_config(false);
		}
		errno = tmp_errno;
		if (errno)
			error("Problem talking to the database: %m");
		exit(1);
	}
	my_uid = getuid();

	if (input_field_count)
		exit_flag = 1;
	else
		error_code = _get_command (&input_field_count, input_fields);
	while (error_code == SLURM_SUCCESS) {
		error_code = _process_command (input_field_count,
					       input_fields);
		if (error_code || exit_flag)
			break;
		error_code = _get_command (&input_field_count, input_fields);
		/* This is here so if someone made a mistake we allow
		 * them to fix it and let the process happen since there
		 * are checks for global exit_code we need to reset it.
		 */
		if (exit_code) {
			local_exit_code = exit_code;
			exit_code = 0;
		}
	}
	/* readline library writes \n when echoes the input string, it does
	 * not when it sees the EOF, so in that case we have to print it to
	 * align the terminal prompt.
	 */
	if (exit_flag == 2)
		putchar('\n');
	if (local_exit_code)
		exit_code = local_exit_code;
	acct_storage_g_close_connection(&db_conn);
	slurm_acct_storage_fini();
	if (g_qos_list)
		list_destroy(g_qos_list);
	if (g_res_list)
		list_destroy(g_res_list);
	exit(exit_code);
}
Ejemplo n.º 18
0
int testListDestroy() {
	printf("Módulo list -> teste destrói lista:");
	list_destroy(NULL);
	printf(" passou\n");
	return 1;
}
Ejemplo n.º 19
0
int main(int argc, char **argv)
{
    int cf_index;
    Cfg *cfg;
    Octstr *filename;

    bb_status = BB_RUNNING;
    
    gwlib_init();
    start_time = time(NULL);

    suspended = list_create();
    isolated = list_create();
    list_add_producer(suspended);
    list_add_producer(isolated);

    cf_index = get_and_set_debugs(argc, argv, check_args);

    if (argv[cf_index] == NULL)
        filename = octstr_create("kannel.conf");
    else
        filename = octstr_create(argv[cf_index]);
    cfg = cfg_create(filename); 
    
    if (cfg_read(cfg) == -1)
        panic(0, "Couldn't read configuration from `%s'.", octstr_get_cstr(filename));
    
    octstr_destroy(filename);

    dlr_init(cfg);
    
    report_versions("bearerbox");

    flow_threads = list_create();
    
    init_bearerbox(cfg);

    info(0, "----------------------------------------");
    info(0, GW_NAME " bearerbox II version %s starting", GW_VERSION);


    gwthread_sleep(5.0); /* give time to threads to register themselves */

    if (store_load()== -1)
	panic(0, "Cannot start with store-file failing");
    
    info(0, "MAIN: Start-up done, entering mainloop");
    if (bb_status == BB_SUSPENDED) {
	info(0, "Gateway is now SUSPENDED by startup arguments");
    } else if (bb_status == BB_ISOLATED) {
	info(0, "Gateway is now ISOLATED by startup arguments");
	list_remove_producer(suspended);
    } else {
	smsc2_resume();
	list_remove_producer(suspended);	
	list_remove_producer(isolated);
    }

    while(bb_status != BB_SHUTDOWN && bb_status != BB_DEAD && list_producer_count(flow_threads) > 0) {
        /* debug("bb", 0, "Main Thread: going to sleep."); */
        /*
         * Not infinite sleep here, because we should notice
         * when all "flow threads" are dead and shutting bearerbox
         * down.
         * XXX if all "flow threads" call gwthread_wakeup(MAIN_THREAD_ID),
         * we can enter infinite sleep then.
         */
        gwthread_sleep(10.0);
        /* debug("bb", 0, "Main Thread: woken up."); */

        if (bb_todo == 0) {
            continue;
        }

        if (bb_todo & BB_LOGREOPEN) {
            warning(0, "SIGHUP received, catching and re-opening logs");
            log_reopen();
            alog_reopen();
            bb_todo = bb_todo & ~BB_LOGREOPEN;
        }

        if (bb_todo & BB_CHECKLEAKS) {
            warning(0, "SIGQUIT received, reporting memory usage.");
	    gw_check_leaks();
            bb_todo = bb_todo & ~BB_CHECKLEAKS;
        }
    }

    if (bb_status == BB_SHUTDOWN || bb_status == BB_DEAD)
        warning(0, "Killing signal or HTTP admin command received, shutting down...");

    /* call shutdown */
    bb_shutdown();

    /* wait until flow threads exit */
    while(list_consume(flow_threads)!=NULL)
	;

    info(0, "All flow threads have died, killing core");
    bb_status = BB_DEAD;
    httpadmin_stop();

    boxc_cleanup();
    smsc2_cleanup();
    empty_msg_lists();
    list_destroy(flow_threads, NULL);
    list_destroy(suspended, NULL);
    list_destroy(isolated, NULL);
    mutex_destroy(status_mutex);

    alog_close();		/* if we have any */
    bb_alog_shutdown();
    cfg_destroy(cfg);
    dlr_shutdown();
    gwlib_shutdown();

    if (restart == 1)
        execvp(argv[0],argv);

    return 0;
}
Ejemplo n.º 20
0
/*
 * pgsql_hourly_rollup - rollup usage data per hour
 */
static int
pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster,
		    time_t start, time_t end)
{
	int rc = SLURM_SUCCESS, add_sec = 3600;
	time_t now = time(NULL), curr_start = start,
		curr_end = curr_start + add_sec;
	char *query = NULL, *usage_recs = NULL;
	ListIterator a_itr = NULL, c_itr = NULL, w_itr = NULL, r_itr = NULL;
	List assoc_usage_list = list_create(_destroy_local_id_usage);
	List cluster_usage_list = list_create(_destroy_local_cluster_usage);
	List wckey_usage_list = list_create(_destroy_local_id_usage);
	List resv_usage_list = list_create(_destroy_local_resv_usage);
	uint16_t track_wckey = slurm_get_track_wckey();

	a_itr = list_iterator_create(assoc_usage_list);
	c_itr = list_iterator_create(cluster_usage_list);
	w_itr = list_iterator_create(wckey_usage_list);
	r_itr = list_iterator_create(resv_usage_list);
	while(curr_start < end) {
		local_cluster_usage_t *c_usage = NULL;
		local_id_usage_t *a_usage = NULL;
		local_id_usage_t *w_usage = NULL;

		debug3("curr hour is now %ld-%ld", curr_start, curr_end);

		rc = _process_event_usage(pg_conn, cluster, curr_start,
					  curr_end, cluster_usage_list);
                if (rc != SLURM_SUCCESS)
                        goto end_it;

		rc = _process_resv_usage(pg_conn, cluster, curr_start,
					 curr_end, cluster_usage_list,
					 resv_usage_list);
                if (rc != SLURM_SUCCESS)
                        goto end_it;

		rc = _process_job_usage(pg_conn, cluster, curr_start, curr_end,
					cluster_usage_list, resv_usage_list,
					assoc_usage_list, wckey_usage_list);
		if (rc != SLURM_SUCCESS)
			goto end_it;

		/* now figure out how much more to add to the
		   associations that could had run in the reservation
		*/
		rc = _process_resv_idle_time(resv_usage_list, assoc_usage_list);
		if (rc != SLURM_SUCCESS)
			goto end_it;

		/* Now put the lists into the usage tables */
		list_iterator_reset(c_itr);
		while((c_usage = list_next(c_itr))) {
			_cluster_usage_sanity_check(cluster, c_usage,
						    curr_start,curr_end);
/* 			info("cluster %s(%d) down %d alloc %d " */
/* 			     "resv %d idle %d over %d " */
/* 			     "total= %d = %d from %s", */
/* 			     c_usage->name, */
/* 			     c_usage->cpu_count, c_usage->d_cpu, */
/* 			     c_usage->a_cpu, */
/* 			     c_usage->r_cpu, c_usage->i_cpu, c_usage->o_cpu, */
/* 			     c_usage->d_cpu + c_usage->a_cpu + */
/* 			     c_usage->r_cpu + c_usage->i_cpu, */
/* 			     c_usage->total_time, */
/* 			     ctime(&c_usage->start)); */
/* 			info("to %s", ctime(&c_usage->end)); */
			if (usage_recs)
				xstrcat(usage_recs, ", ");
			xstrfmtcat(usage_recs,
				   "CAST((%ld, %ld, 0, %ld, %d, "
				   "%"PRIu64", %"PRIu64", %"PRIu64", "
				   "%"PRIu64", %"PRIu64", %"PRIu64")"
				   " AS %s.%s)",
				   now, now, curr_start, c_usage->cpu_count,
				   c_usage->a_cpu, c_usage->d_cpu,
				   c_usage->pd_cpu, c_usage->i_cpu,
				   c_usage->o_cpu, c_usage->r_cpu, cluster,
				   cluster_hour_table);
                }
                if (usage_recs) {
                        query = xstrdup_printf (
				"SELECT %s.add_cluster_hour_usages(ARRAY[%s]);",
				cluster, usage_recs);
			xfree(usage_recs);
			rc = DEF_QUERY_RET_RC;
			if (rc != SLURM_SUCCESS) {
				error("couldn't add cluster hour rollup");
				goto end_it;
			}
		}

		list_iterator_reset(a_itr);
		while((a_usage = list_next(a_itr))) {
/* 			info("association (%d) %d alloc %d", */
/* 			     a_usage->id, last_id, */
/* 			     a_usage->a_cpu); */
			if (usage_recs)
				xstrcat(usage_recs, ", ");
			xstrfmtcat(usage_recs,
				   "CAST((%ld, %ld, 0, %d, %ld, "
				   "%"PRIu64") AS %s.%s)",
                                   now, now, a_usage->id, curr_start,
				   a_usage->a_cpu, cluster, assoc_hour_table);
		}
		if (usage_recs) {
			query = xstrdup_printf(
				"SELECT %s.add_assoc_hour_usages(ARRAY[%s]);",
				cluster, usage_recs);
			xfree(usage_recs);
			rc = DEF_QUERY_RET_RC;
			if (rc != SLURM_SUCCESS) {
				error("Couldn't add assoc hour rollup");
				goto end_it;
			}
		}

		if (!track_wckey)
			goto end_loop;

		list_iterator_reset(w_itr);
		while((w_usage = list_next(w_itr))) {
/* 			info("association (%d) %d alloc %d", */
/* 			     w_usage->id, last_id, */
/* 			     w_usage->a_cpu); */
			if (usage_recs)
				xstrcat(usage_recs, ", ");
			xstrfmtcat(usage_recs,
				   "CAST((%ld, %ld, 0, %d, %ld, "
				   "%"PRIu64", 0, 0) AS %s.%s)",
				   now, now, w_usage->id, curr_start,
				   w_usage->a_cpu, cluster, wckey_hour_table);
		}
		if (usage_recs) {
			query = xstrdup_printf(
				"SELECT %s.add_wckey_hour_usages(ARRAY[%s]);",
				cluster, usage_recs);
			xfree(usage_recs);
			rc = DEF_QUERY_RET_RC;
			if (rc != SLURM_SUCCESS) {
				error("Couldn't add wckey hour rollup");
				goto end_it;
			}
		}

	end_loop:
		list_flush(assoc_usage_list);
		list_flush(cluster_usage_list);
		list_flush(wckey_usage_list);
		list_flush(resv_usage_list);
		curr_start = curr_end;
		curr_end = curr_start + add_sec;
	}
end_it:
	list_iterator_destroy(a_itr);
	list_iterator_destroy(c_itr);
	list_iterator_destroy(w_itr);
	list_iterator_destroy(r_itr);

	list_destroy(assoc_usage_list);
	list_destroy(cluster_usage_list);
	list_destroy(wckey_usage_list);
	list_destroy(resv_usage_list);

/* 	info("stop start %s", ctime(&curr_start)); */
/* 	info("stop end %s", ctime(&curr_end)); */
	return rc;
}
Ejemplo n.º 21
0
void sequences_exit(void)
{
	list_destroy(sql_seqs);
}
Ejemplo n.º 22
0
/*
 * pgsql_monthly_rollup - rollup usage data per month
 */
static int
pgsql_monthly_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start,
		     time_t end, uint16_t archive_data)
{
	int rc = SLURM_SUCCESS;
	char *query = NULL;
	slurmdb_archive_cond_t arch_cond;
	slurmdb_job_cond_t job_cond;
	time_t now = time(NULL);
	time_t curr_start = start, curr_end;
	uint16_t track_wckey = slurm_get_track_wckey();

	if (_next_month(&curr_start, &curr_end) != SLURM_SUCCESS) {
		return SLURM_ERROR;
	}
	while(curr_start < end) {
		debug3("curr month is now %ld-%ld", curr_start, curr_end);
		query = xstrdup_printf(
			"SELECT %s.assoc_monthly_rollup(%ld, %ld, %ld);",
			cluster, now, curr_start, curr_end);
		xstrfmtcat(query,
			   "SELECT %s.cluster_monthly_rollup(%ld, %ld, %ld);",
			   cluster, now, curr_start, curr_end);
		if (track_wckey) {
			xstrfmtcat(
				query,
				"SELECT %s.wckey_monthly_rollup(%ld, %ld, %ld);",
				cluster, now, curr_start, curr_end);
		}
		rc = DEF_QUERY_RET_RC;
		if (rc != SLURM_SUCCESS) {
			error("Couldn't add month rollup");
			return SLURM_ERROR;
		}
		curr_start = curr_end;
		if (_next_month(&curr_start, &curr_end) != SLURM_SUCCESS)
			return SLURM_ERROR;
	}

	/* if we didn't ask for archive data return here and don't do
	   anything extra just rollup */

	if (!archive_data)
		return SLURM_SUCCESS;

	if (!slurmdbd_conf)
		return SLURM_SUCCESS;

	memset(&arch_cond, 0, sizeof(arch_cond));
	memset(&job_cond, 0, sizeof(job_cond));
	arch_cond.archive_dir = slurmdbd_conf->archive_dir;
	arch_cond.archive_script = slurmdbd_conf->archive_script;
	arch_cond.purge_event = slurmdbd_conf->purge_event;
	arch_cond.purge_job = slurmdbd_conf->purge_job;
	arch_cond.purge_step = slurmdbd_conf->purge_step;
	arch_cond.purge_suspend = slurmdbd_conf->purge_suspend;

	job_cond.cluster_list = list_create(NULL);
	list_append(job_cond.cluster_list, cluster);
	arch_cond.job_cond = &job_cond;

	rc = js_pg_archive(pg_conn, &arch_cond);

	list_destroy(job_cond.cluster_list);
	
	return rc;
}
Ejemplo n.º 23
0
dict_t dict_destroy(dict_t dict) {
    dict->list = list_destroy(dict->list);
    free(dict);
    dict = NULL;
    return (dict);
}
Ejemplo n.º 24
0
static uint32_t _get_wckeyid(mysql_conn_t *mysql_conn, char **name,
			     uid_t uid, char *cluster, uint32_t associd)
{
	uint32_t wckeyid = 0;

	if (slurm_get_track_wckey()) {
		/* Here we are looking for the wckeyid if it doesn't
		 * exist we will create one.  We don't need to check
		 * if it is good or not.  Right now this is the only
		 * place things are created. We do this only on a job
		 * start, not on a job submit since we don't want to
		 * slow down getting the db_index back to the
		 * controller.
		 */
		slurmdb_wckey_rec_t wckey_rec;
		char *user = NULL;

		/* since we are unable to rely on uids here (someone could
		   not have there uid in the system yet) we must
		   first get the user name from the associd */
		if (!(user = _get_user_from_associd(
			      mysql_conn, cluster, associd))) {
			error("No user for associd %u", associd);
			goto no_wckeyid;
		}
		/* get the default key */
		if (!*name) {
			slurmdb_user_rec_t user_rec;
			memset(&user_rec, 0, sizeof(slurmdb_user_rec_t));
			user_rec.uid = NO_VAL;
			user_rec.name = user;
			if (assoc_mgr_fill_in_user(mysql_conn, &user_rec,
						   1, NULL) != SLURM_SUCCESS) {
				error("No user by name of %s assoc %u",
				      user, associd);
				xfree(user);
				goto no_wckeyid;
			}

			if (user_rec.default_wckey)
				*name = xstrdup_printf("*%s",
						       user_rec.default_wckey);
			else
				*name = xstrdup_printf("*");
		}

		memset(&wckey_rec, 0, sizeof(slurmdb_wckey_rec_t));
		wckey_rec.name = (*name);
		wckey_rec.uid = NO_VAL;
		wckey_rec.user = user;
		wckey_rec.cluster = cluster;
		if (assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec,
					    ACCOUNTING_ENFORCE_WCKEYS,
					    NULL) != SLURM_SUCCESS) {
			List wckey_list = NULL;
			slurmdb_wckey_rec_t *wckey_ptr = NULL;

			wckey_list = list_create(slurmdb_destroy_wckey_rec);

			wckey_ptr = xmalloc(sizeof(slurmdb_wckey_rec_t));
			wckey_ptr->name = xstrdup((*name));
			wckey_ptr->user = xstrdup(user);
			wckey_ptr->cluster = xstrdup(cluster);
			list_append(wckey_list, wckey_ptr);
			/* info("adding wckey '%s' '%s' '%s'", */
			/* 	     wckey_ptr->name, wckey_ptr->user, */
			/* 	     wckey_ptr->cluster); */
			/* we have already checked to make
			   sure this was the slurm user before
			   calling this */
			if (as_mysql_add_wckeys(mysql_conn,
						slurm_get_slurm_user_id(),
						wckey_list)
			    == SLURM_SUCCESS)
				acct_storage_p_commit(mysql_conn, 1);
			/* If that worked lets get it */
			assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec,
						ACCOUNTING_ENFORCE_WCKEYS,
						NULL);

			list_destroy(wckey_list);
		}
		xfree(user);
		/* info("got wckeyid of %d", wckey_rec.id); */
		wckeyid = wckey_rec.id;
	}
no_wckeyid:
	return wckeyid;
}
Ejemplo n.º 25
0
/*
 * create_defined_blocks - create the static blocks that will be used
 * for scheduling, all partitions must be able to be created and booted
 * at once.
 * IN - int overlapped, 1 if partitions are to be overlapped, 0 if they are
 * static.
 * RET - success of fitting all configurations
 */
extern int create_defined_blocks(bg_layout_t overlapped,
				 List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;

	ListIterator itr;
	bg_record_t *bg_record = NULL;
	int i;
	uint16_t geo[SYSTEM_DIMENSIONS];
	char temp[256];
	struct part_record *part_ptr = NULL;
	bitstr_t *usable_mp_bitmap = bit_alloc(node_record_count);

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(usable_mp_bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	if (bit_ffs(usable_mp_bitmap) == -1) {
		fatal("We don't have any nodes in any partitions.  "
		      "Can't create blocks.  "
		      "Please check your slurm.conf.");
	}

	slurm_mutex_lock(&block_state_mutex);
	reset_ba_system(false);
	ba_set_removable_mps(usable_mp_bitmap, 1);
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = list_next(itr))) {
			if (bg_record->mp_count > 0
			    && !bg_record->full_block
			    && bg_record->cpu_cnt >= bg_conf->cpus_per_mp) {
				char *name = NULL;
				char start_char[SYSTEM_DIMENSIONS+1];
				char geo_char[SYSTEM_DIMENSIONS+1];

				if (overlapped == LAYOUT_OVERLAP) {
					reset_ba_system(false);
					ba_set_removable_mps(usable_mp_bitmap,
							     1);
				}

				/* we want the mps that aren't
				 * in this record to mark them as used
				 */
				if (ba_set_removable_mps(bg_record->bitmap, 1)
				    != SLURM_SUCCESS)
					fatal("It doesn't seem we have a "
					      "bitmap for %s",
					      bg_record->bg_block_id);

				for (i=0; i<SYSTEM_DIMENSIONS; i++) {
					geo[i] = bg_record->geo[i];
					start_char[i] = alpha_num[
						bg_record->start[i]];
					geo_char[i] = alpha_num[geo[i]];
				}
				start_char[i] = '\0';
				geo_char[i] = '\0';

				debug2("adding %s %s %s",
				       bg_record->mp_str,
				       start_char, geo_char);
				if (bg_record->ba_mp_list
				    && list_count(bg_record->ba_mp_list)) {
					if ((rc = check_and_set_mp_list(
						     bg_record->ba_mp_list))
					    != SLURM_SUCCESS) {
						debug2("something happened in "
						       "the load of %s"
						       "Did you use smap to "
						       "make the "
						       "bluegene.conf file?",
						       bg_record->bg_block_id);
						break;
					}
				} else {
#ifdef HAVE_BGQ
					List results =
						list_create(destroy_ba_mp);
#else
					List results = list_create(NULL);
#endif
					name = set_bg_block(
						results,
						bg_record->start,
						geo,
						bg_record->conn_type);
					ba_reset_all_removed_mps();
					if (!name) {
						error("I was unable to "
						      "make the "
						      "requested block.");
						list_destroy(results);
						rc = SLURM_ERROR;
						break;
					}

					snprintf(temp, sizeof(temp), "%s%s",
						 bg_conf->slurm_node_prefix,
						 name);

					xfree(name);
					if (strcmp(temp, bg_record->mp_str)) {
						fatal("given list of %s "
						      "but allocated %s, "
						      "your order might be "
						      "wrong in bluegene.conf",
						      bg_record->mp_str,
						      temp);
					}
					if (bg_record->ba_mp_list)
						list_destroy(
							bg_record->ba_mp_list);
#ifdef HAVE_BGQ
					bg_record->ba_mp_list = results;
					results = NULL;
#else
					bg_record->ba_mp_list =
						list_create(destroy_ba_mp);
					copy_node_path(results,
						       &bg_record->ba_mp_list);
					list_destroy(results);
#endif
				}
			}
			if (!block_exist_in_list(
				    bg_found_block_list, bg_record)) {
				if (bg_record->full_block) {
					/* if this is defined we need
					   to remove it since we are
					   going to try to create it
					   later on overlap systems
					   this doesn't matter, but
					   since we don't clear the
					   table on static mode we
					   can't do it here or it just
					   won't work since other
					   wires will be or are
					   already set
					*/
					list_remove(itr);
					continue;
				}
				if ((rc = bridge_block_create(bg_record))
				    != SLURM_SUCCESS)
					break;
				print_bg_record(bg_record);
			}
		}
		list_iterator_destroy(itr);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	} else {
		error("create_defined_blocks: no bg_lists->main 2");
		rc = SLURM_ERROR;
		goto end_it;
	}
	slurm_mutex_unlock(&block_state_mutex);
	create_full_system_block(bg_found_block_list);

	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);

end_it:
	ba_reset_all_removed_mps();
	FREE_NULL_BITMAP(usable_mp_bitmap);
	slurm_mutex_unlock(&block_state_mutex);

#ifdef _PRINT_BLOCKS_AND_EXIT
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		debug("\n\n");
		while ((found_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			print_bg_record(found_record);
		}
		list_iterator_destroy(itr);
	} else {
		error("create_defined_blocks: no bg_lists->main 5");
	}
 	exit(0);
#endif	/* _PRINT_BLOCKS_AND_EXIT */
	//exit(0);
	return rc;
}
Ejemplo n.º 26
0
extern List as_mysql_modify_job(mysql_conn_t *mysql_conn, uint32_t uid,
				slurmdb_job_modify_cond_t *job_cond,
				slurmdb_job_rec_t *job)
{
	List ret_list = NULL;
	int rc = SLURM_SUCCESS;
	char *object = NULL;
	char *vals = NULL, *query = NULL, *cond_char = NULL;
	time_t now = time(NULL);
	char *user_name = NULL;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;

	if (!job_cond || !job) {
		error("we need something to change");
		return NULL;
	} else if (job_cond->job_id == NO_VAL) {
		errno = SLURM_NO_CHANGE_IN_DATA;
		error("Job ID was not specified for job modification\n");
		return NULL;
	} else if (!job_cond->cluster) {
		errno = SLURM_NO_CHANGE_IN_DATA;
		error("Cluster was not specified for job modification\n");
		return NULL;
	} else if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return NULL;

	if (job->derived_ec != NO_VAL)
		xstrfmtcat(vals, ", derived_ec=%u", job->derived_ec);

	if (job->derived_es)
		xstrfmtcat(vals, ", derived_es='%s'", job->derived_es);

	if (!vals) {
		errno = SLURM_NO_CHANGE_IN_DATA;
		error("No change specified for job modification");
		return NULL;
	}

	/* Here we want to get the last job submitted here */
	query = xstrdup_printf("select job_db_inx, id_job, time_submit "
			       "from \"%s_%s\" where deleted=0 "
			       "&& id_job=%u && id_user=%u "
			       "order by time_submit desc limit 1;",
			       job_cond->cluster, job_table,
			       job_cond->job_id, uid);

	debug3("%d(%s:%d) query\n%s",
	       mysql_conn->conn, THIS_FILE, __LINE__, query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(vals);
		xfree(query);
		return NULL;
	}

	if ((row = mysql_fetch_row(result))) {
		char tmp_char[25];
		time_t time_submit = atol(row[2]);
		slurm_make_time_str(&time_submit, tmp_char, sizeof(tmp_char));

		xstrfmtcat(cond_char, "job_db_inx=%s", row[0]);
		object = xstrdup_printf("%s submitted at %s", row[1], tmp_char);

		ret_list = list_create(slurm_destroy_char);
		list_append(ret_list, object);
		mysql_free_result(result);
	} else {
		errno = ESLURM_INVALID_JOB_ID;
		debug3("as_mysql_modify_job: Job not found\n%s", query);
		xfree(vals);
		xfree(query);
		mysql_free_result(result);
		return NULL;
	}
	xfree(query);

	user_name = uid_to_string((uid_t) uid);
	rc = modify_common(mysql_conn, DBD_MODIFY_JOB, now, user_name,
			   job_table, cond_char, vals, job_cond->cluster);
	xfree(user_name);
	xfree(cond_char);
	xfree(vals);
	if (rc == SLURM_ERROR) {
		error("Couldn't modify job");
		list_destroy(ret_list);
		ret_list = NULL;
	}

	return ret_list;
}
int cvxhull(const List * P, List * polygon)
{

	ListElmt *element;

	Point *min, *low, *p0, *pi, *pc;

	double z, length1, length2;

	int count;

/*****************************************************************************
*  Find the lowest point in the list of points.                              *
*****************************************************************************/

	min = list_data(list_head(P));

	for (element = list_head(P); element != NULL;
	     element = list_next(element)) {

		p0 = list_data(element);

   /**************************************************************************
   *  Keep track of the lowest point thus far.                               *
   **************************************************************************/

		if (p0->y < min->y) {

			min = p0;
			low = list_data(element);

		}

		else {

      /***********************************************************************
      *  If a tie occurs, use the lowest and leftmost point.                 *
      ***********************************************************************/

			if (p0->y == min->y && p0->x < min->x) {

				min = p0;
				low = list_data(element);

			}

		}

	}

/*****************************************************************************
*  Initialize the list for the convex hull.                                  *
*****************************************************************************/

	list_init(polygon, NULL);

/*****************************************************************************
*  Perform Jarvis's march to compute the convex hull.                        *
*****************************************************************************/

	p0 = low;

	do {

   /**************************************************************************
   *  Insert the new p0 into the convex hull.                                *
   **************************************************************************/

		if (list_ins_next(polygon, list_tail(polygon), p0) != 0) {

			list_destroy(polygon);
			return -1;

		}

   /**************************************************************************
   *  Find the point pc that is clockwise from all others.                   *
   **************************************************************************/

		count = 0;

		for (element = list_head(P); element != NULL; element =
		     list_next(element)) {

      /***********************************************************************
      *  Skip p0 in the list of points.                                      *
      ***********************************************************************/

			if ((pi = list_data(element)) == p0)
				continue;

      /***********************************************************************
      *  Count how many points have been explored.                           *
      ***********************************************************************/

			count++;

      /***********************************************************************
      *  Assume the first point to explore is clockwise from all others      *
      *  until proven otherwise.                                             *
      ***********************************************************************/

			if (count == 1) {

				pc = list_data(element);
				continue;

			}

      /***********************************************************************
      *  Determine whether pi is clockwise from pc.                          *
      ***********************************************************************/

			if ((z =
			     ((pi->x - p0->x) * (pc->y - p0->y)) -
			     ((pi->y - p0->y) * (pc->x - p0->x))) > 0) {

	 /********************************************************************
         *  The point pi is clockwise from pc.                               *
         ********************************************************************/

				pc = pi;

			}

			else if (z == 0) {

	 /********************************************************************
         *  If pi and pc are collinear, select the point furthest from p0.   *
         ********************************************************************/

				length1 =
				    sqrt(pow(pi->x - p0->x, 2.0) +
					 pow(pi->y - p0->y, 2.0));
				length2 =
				    sqrt(pow(pc->x - p0->x, 2.0) +
					 pow(pc->y - p0->y, 2.0));

				if (length1 > length2) {

	    /*****************************************************************
            *  The point pi is further from p0 than pc.                      *
            *****************************************************************/

					pc = pi;

				}

			}

		}

   /**************************************************************************
   *  Prepare to find the next point for the convex hull.                    *
   **************************************************************************/

		p0 = pc;

   /**************************************************************************
   *  Continue until reaching the lowest point again.                        *
   **************************************************************************/

	} while (p0 != low);

	return 0;

}
Ejemplo n.º 28
0
t_posicion definirVariable(t_nombre_variable variable) {
    if(status_check() != EXECUTING)
        return ERROR;

    //1) Obtener el stack index actual
    t_stack* actual_stack_index = actual_pcb->stack_index;

    //2) Obtengo la primera posicion libre del stack
    t_posicion actual_stack_pointer = (t_posicion) actual_pcb->stack_pointer;

    //3) Armamos la logical address requerida
    logical_addr* direccion_espectante = armar_direccion_logica_variable(actual_stack_pointer, setup->PAGE_SIZE);

    int valor = 0;

    t_list * pedidos = NULL;
    //TODO: CAMBIAR ESTO PARA QUE LA LISTA DE PEDIDOS SEA DE NUMEROS, NO DE SPRINTF
    obtener_lista_operaciones_escritura(&pedidos, actual_stack_pointer, ANSISOP_VAR_SIZE, valor);

    int index = 0;
    t_nodo_send * nodo = NULL;
    char * umc_response_buffer = calloc(1, sizeof(char));
    if(umc_response_buffer == NULL) {
        log_error(cpu_log, "definirVariable umc_response_buffer mem alloc failed");
        return ERROR;
    }

    char operation;
    int page, offset, size, print_index = 0;
    while (list_size(pedidos) > 0) {
        nodo = list_remove(pedidos, index);
        deserialize_data(&operation, sizeof(char), nodo->data, &print_index);
        deserialize_data(&page, sizeof(int), nodo->data, &print_index);
        deserialize_data(&offset, sizeof(int), nodo->data, &print_index);
        deserialize_data(&size, sizeof(int), nodo->data, &print_index);
        log_info(cpu_log, "Sending request op:%c (%d,%d,%d) to define variable '%c' with value %d", operation, page, offset, size, variable, valor);
        if( send(umcSocketClient, nodo->data , (size_t ) nodo->data_length, 0) < 0) {
            log_error(cpu_log, "UMC expected addr send failed");
            return ERROR;
        }

        if(check_umc_response(recv_umc_response_status()) != SUCCESS) {
            return ERROR;
        }

    }

    list_destroy(pedidos); //Liberamos la estructura de lista reservada

    free(umc_response_buffer);

    //5) Si esta bien, agregamos la nueva t_var al stack
    t_var * nueva_variable = calloc(1, sizeof(t_var));
    nueva_variable->var_id = variable;
    nueva_variable->page_number = direccion_espectante->page_number;
    nueva_variable->offset = direccion_espectante->offset;
    nueva_variable->tamanio = direccion_espectante->tamanio;

    //6) Actualizo el stack pointer
    actual_pcb->stack_pointer += nueva_variable->tamanio;

    free(direccion_espectante);
    t_stack_entry *last_entry = get_last_entry(actual_stack_index);
    add_var( &last_entry, nueva_variable);

    //7) y retornamos la t_posicion asociada
    t_posicion posicion_nueva_variable = get_t_posicion(nueva_variable);
    return posicion_nueva_variable;
}
Ejemplo n.º 29
0
static int _cluster_get_jobs(mysql_conn_t *mysql_conn,
			     slurmdb_user_rec_t *user,
			     slurmdb_job_cond_t *job_cond,
			     char *cluster_name,
			     char *job_fields, char *step_fields,
			     char *sent_extra,
			     bool is_admin, int only_pending, List sent_list)
{
	char *query = NULL;
	char *extra = xstrdup(sent_extra);
	uint16_t private_data = slurm_get_private_data();
	slurmdb_selected_step_t *selected_step = NULL;
	MYSQL_RES *result = NULL, *step_result = NULL;
	MYSQL_ROW row, step_row;
	slurmdb_job_rec_t *job = NULL;
	slurmdb_step_rec_t *step = NULL;
	time_t now = time(NULL);
	List job_list = list_create(slurmdb_destroy_job_rec);
	ListIterator itr = NULL;
	List local_cluster_list = NULL;
	int set = 0;
	char *prefix="t2";
	int rc = SLURM_SUCCESS;
	int last_id = -1, curr_id = -1;
	local_cluster_t *curr_cluster = NULL;

	/* This is here to make sure we are looking at only this user
	 * if this flag is set.  We also include any accounts they may be
	 * coordinator of.
	 */
	if (!is_admin && (private_data & PRIVATE_DATA_JOBS)) {
		query = xstrdup_printf("select lft from \"%s_%s\" "
				       "where user='******'",
				       cluster_name, assoc_table, user->name);
		if (user->coord_accts) {
			slurmdb_coord_rec_t *coord = NULL;
			itr = list_iterator_create(user->coord_accts);
			while ((coord = list_next(itr))) {
				xstrfmtcat(query, " || acct='%s'",
					   coord->name);
			}
			list_iterator_destroy(itr);
		}
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		if (!(result = mysql_db_query_ret(
			      mysql_conn, query, 0))) {
			xfree(extra);
			xfree(query);
			rc = SLURM_ERROR;
			goto end_it;
		}
		xfree(query);
		set = 0;
		while ((row = mysql_fetch_row(result))) {
			if (set) {
				xstrfmtcat(extra,
					   " || (%s between %s.lft and %s.rgt)",
					   row[0], prefix, prefix);
			} else {
				set = 1;
				if (extra)
					xstrfmtcat(extra,
						   " && ((%s between %s.lft "
						   "and %s.rgt)",
						   row[0], prefix,
						   prefix);
				else
					xstrfmtcat(extra,
						   " where ((%s between %s.lft "
						   "and %s.rgt)",
						   row[0], prefix,
						   prefix);
			}
		}

		mysql_free_result(result);

		if (set)
			xstrcat(extra, ")");
		else {
			xfree(extra);
			debug("User %s has no assocations, and is not admin, "
			      "so not returning any jobs.", user->name);
			/* This user has no valid associations, so
			 * they will not have any jobs. */
			goto end_it;
		}
	}

	setup_job_cluster_cond_limits(mysql_conn, job_cond,
				      cluster_name, &extra);

	query = xstrdup_printf("select %s from \"%s_%s\" as t1 "
			       "left join \"%s_%s\" as t2 "
			       "on t1.id_assoc=t2.id_assoc",
			       job_fields, cluster_name, job_table,
			       cluster_name, assoc_table);
	if (extra) {
		xstrcat(query, extra);
		xfree(extra);
	}

	/* Here we want to order them this way in such a way so it is
	   easy to look for duplicates, it is also easy to sort the
	   resized jobs.
	*/
	xstrcat(query, " group by id_job, time_submit desc");

	debug3("%d(%s:%d) query\n%s",
	       mysql_conn->conn, THIS_FILE, __LINE__, query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		rc = SLURM_ERROR;
		goto end_it;
	}
	xfree(query);


	/* Here we set up environment to check used nodes of jobs.
	   Since we store the bitmap of the entire cluster we can use
	   that to set up a hostlist and set up the bitmap to make
	   things work.  This should go before the setup of conds
	   since we could update the start/end time.
	*/
	if (job_cond && job_cond->used_nodes) {
		local_cluster_list = setup_cluster_list_with_inx(
			mysql_conn, job_cond, (void **)&curr_cluster);
		if (!local_cluster_list) {
			rc = SLURM_ERROR;
			goto end_it;
		}
	}

	while ((row = mysql_fetch_row(result))) {
		char *id = row[JOB_REQ_ID];
		bool job_ended = 0;
		int submit = slurm_atoul(row[JOB_REQ_SUBMIT]);

		curr_id = slurm_atoul(row[JOB_REQ_JOBID]);

		if (job_cond && !job_cond->duplicates
		    && (curr_id == last_id)
		    && (slurm_atoul(row[JOB_REQ_STATE]) != JOB_RESIZING))
			continue;

		/* check the bitmap to see if this is one of the jobs
		   we are looking for */
		if (!good_nodes_from_inx(local_cluster_list,
					 (void **)&curr_cluster,
					 row[JOB_REQ_NODE_INX], submit)) {
			last_id = curr_id;
			continue;
		}

		job = slurmdb_create_job_rec();
		job->state = slurm_atoul(row[JOB_REQ_STATE]);
		if (curr_id == last_id)
			/* put in reverse so we order by the submit getting
			   larger which it is given to us in reverse
			   order from the database */
			list_prepend(job_list, job);
		else
			list_append(job_list, job);
		last_id = curr_id;

		job->alloc_cpus = slurm_atoul(row[JOB_REQ_ALLOC_CPUS]);
		job->alloc_nodes = slurm_atoul(row[JOB_REQ_ALLOC_NODES]);
		job->associd = slurm_atoul(row[JOB_REQ_ASSOCID]);
		job->resvid = slurm_atoul(row[JOB_REQ_RESVID]);
		job->cluster = xstrdup(cluster_name);

		/* we want a blank wckey if the name is null */
		if (row[JOB_REQ_WCKEY])
			job->wckey = xstrdup(row[JOB_REQ_WCKEY]);
		else
			job->wckey = xstrdup("");
		job->wckeyid = slurm_atoul(row[JOB_REQ_WCKEYID]);

		if (row[JOB_REQ_USER_NAME])
			job->user = xstrdup(row[JOB_REQ_USER_NAME]);
		else
			job->uid = slurm_atoul(row[JOB_REQ_UID]);

		if (row[JOB_REQ_LFT])
			job->lft = slurm_atoul(row[JOB_REQ_LFT]);

		if (row[JOB_REQ_ACCOUNT] && row[JOB_REQ_ACCOUNT][0])
			job->account = xstrdup(row[JOB_REQ_ACCOUNT]);
		else if (row[JOB_REQ_ACCOUNT1] && row[JOB_REQ_ACCOUNT1][0])
			job->account = xstrdup(row[JOB_REQ_ACCOUNT1]);

		if (row[JOB_REQ_BLOCKID])
			job->blockid = xstrdup(row[JOB_REQ_BLOCKID]);

		job->eligible = slurm_atoul(row[JOB_REQ_ELIGIBLE]);
		job->submit = submit;
		job->start = slurm_atoul(row[JOB_REQ_START]);
		job->end = slurm_atoul(row[JOB_REQ_END]);
		job->timelimit = slurm_atoul(row[JOB_REQ_TIMELIMIT]);

		/* since the job->end could be set later end it here */
		if (job->end) {
			job_ended = 1;
			if (!job->start || (job->start > job->end))
				job->start = job->end;
		}

		if (job_cond && !job_cond->without_usage_truncation
		    && job_cond->usage_start) {
			if (job->start && (job->start < job_cond->usage_start))
				job->start = job_cond->usage_start;

			if (!job->end || job->end > job_cond->usage_end)
				job->end = job_cond->usage_end;

			if (!job->start)
				job->start = job->end;

			job->elapsed = job->end - job->start;

			if (row[JOB_REQ_SUSPENDED]) {
				MYSQL_RES *result2 = NULL;
				MYSQL_ROW row2;
				/* get the suspended time for this job */
				query = xstrdup_printf(
					"select time_start, time_end from "
					"\"%s_%s\" where "
					"(time_start < %ld && (time_end >= %ld "
					"|| time_end = 0)) && job_db_inx=%s "
					"order by time_start",
					cluster_name, suspend_table,
					job_cond->usage_end,
					job_cond->usage_start,
					id);

				debug4("%d(%s:%d) query\n%s",
				       mysql_conn->conn, THIS_FILE,
				       __LINE__, query);
				if (!(result2 = mysql_db_query_ret(
					      mysql_conn,
					      query, 0))) {
					list_destroy(job_list);
					job_list = NULL;
					break;
				}
				xfree(query);
				while ((row2 = mysql_fetch_row(result2))) {
					time_t local_start =
						slurm_atoul(row2[0]);
					time_t local_end =
						slurm_atoul(row2[1]);

					if (!local_start)
						continue;

					if (job->start > local_start)
						local_start = job->start;
					if (job->end < local_end)
						local_end = job->end;

					if ((local_end - local_start) < 1)
						continue;

					job->elapsed -=
						(local_end - local_start);
					job->suspended +=
						(local_end - local_start);
				}
				mysql_free_result(result2);

			}
		} else {
			job->suspended = slurm_atoul(row[JOB_REQ_SUSPENDED]);

			/* fix the suspended number to be correct */
			if (job->state == JOB_SUSPENDED)
				job->suspended = now - job->suspended;
			if (!job->start) {
				job->elapsed = 0;
			} else if (!job->end) {
				job->elapsed = now - job->start;
			} else {
				job->elapsed = job->end - job->start;
			}

			job->elapsed -= job->suspended;
		}

		if ((int)job->elapsed < 0)
			job->elapsed = 0;

		job->jobid = curr_id;
		job->jobname = xstrdup(row[JOB_REQ_NAME]);
		job->gid = slurm_atoul(row[JOB_REQ_GID]);
		job->exitcode = slurm_atoul(row[JOB_REQ_EXIT_CODE]);
		job->derived_ec = slurm_atoul(row[JOB_REQ_DERIVED_EC]);
		job->derived_es = xstrdup(row[JOB_REQ_DERIVED_ES]);

		if (row[JOB_REQ_PARTITION])
			job->partition = xstrdup(row[JOB_REQ_PARTITION]);

		if (row[JOB_REQ_NODELIST])
			job->nodes = xstrdup(row[JOB_REQ_NODELIST]);

		if (!job->nodes || !strcmp(job->nodes, "(null)")) {
			xfree(job->nodes);
			job->nodes = xstrdup("(unknown)");
		}

		job->track_steps = slurm_atoul(row[JOB_REQ_TRACKSTEPS]);
		job->priority = slurm_atoul(row[JOB_REQ_PRIORITY]);
		job->req_cpus = slurm_atoul(row[JOB_REQ_REQ_CPUS]);
		job->req_mem = slurm_atoul(row[JOB_REQ_REQ_MEM]);
		job->requid = slurm_atoul(row[JOB_REQ_KILL_REQUID]);
		job->qosid = slurm_atoul(row[JOB_REQ_QOS]);
		job->show_full = 1;

		if (only_pending || (job_cond && job_cond->without_steps))
			goto skip_steps;

		if (job_cond && job_cond->step_list
		    && list_count(job_cond->step_list)) {
			set = 0;
			itr = list_iterator_create(job_cond->step_list);
			while ((selected_step = list_next(itr))) {
				if (selected_step->jobid != job->jobid) {
					continue;
				} else if (selected_step->stepid == NO_VAL) {
					job->show_full = 1;
					break;
				} else if (selected_step->stepid == INFINITE)
					selected_step->stepid =
						SLURM_BATCH_SCRIPT;

				if (set)
					xstrcat(extra, " || ");
				else
					xstrcat(extra, " && (");

				/* The stepid could be -2 so use %d not %u */
				xstrfmtcat(extra, "t1.id_step=%d",
					   selected_step->stepid);
				set = 1;
				job->show_full = 0;
			}
			list_iterator_destroy(itr);
			if (set)
				xstrcat(extra, ")");
		}
		query =	xstrdup_printf("select %s from \"%s_%s\" as t1 "
				       "where t1.job_db_inx=%s",
				       step_fields, cluster_name,
				       step_table, id);
		if (extra) {
			xstrcat(query, extra);
			xfree(extra);
		}

		debug4("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);

		if (!(step_result = mysql_db_query_ret(
			      mysql_conn, query, 0))) {
			xfree(query);
			rc = SLURM_ERROR;
			goto end_it;
		}
		xfree(query);

		/* Querying the steps in the fashion was faster than
		   doing only 1 query and then matching the steps up
		   later with the job.
		*/
		while ((step_row = mysql_fetch_row(step_result))) {
			/* check the bitmap to see if this is one of the steps
			   we are looking for */
			if (!good_nodes_from_inx(local_cluster_list,
						 (void **)&curr_cluster,
						 step_row[STEP_REQ_NODE_INX],
						 submit))
				continue;

			step = slurmdb_create_step_rec();
			step->tot_cpu_sec = 0;
			step->tot_cpu_usec = 0;
			step->job_ptr = job;
			if (!job->first_step_ptr)
				job->first_step_ptr = step;
			list_append(job->steps, step);
			step->stepid = slurm_atoul(step_row[STEP_REQ_STEPID]);
			/* info("got step %u.%u", */
/* 			     job->header.jobnum, step->stepnum); */
			step->state = slurm_atoul(step_row[STEP_REQ_STATE]);
			step->exitcode =
				slurm_atoul(step_row[STEP_REQ_EXIT_CODE]);
			step->ncpus = slurm_atoul(step_row[STEP_REQ_CPUS]);
			step->nnodes = slurm_atoul(step_row[STEP_REQ_NODES]);

			step->ntasks = slurm_atoul(step_row[STEP_REQ_TASKS]);
			step->task_dist =
				slurm_atoul(step_row[STEP_REQ_TASKDIST]);
			if (!step->ntasks)
				step->ntasks = step->ncpus;

			step->start = slurm_atoul(step_row[STEP_REQ_START]);

			step->end = slurm_atoul(step_row[STEP_REQ_END]);
			/* if the job has ended end the step also */
			if (!step->end && job_ended) {
				step->end = job->end;
				step->state = job->state;
			}

			if (job_cond && !job_cond->without_usage_truncation
			    && job_cond->usage_start) {
				if (step->start
				    && (step->start < job_cond->usage_start))
					step->start = job_cond->usage_start;

				if (!step->start && step->end)
					step->start = step->end;

				if (!step->end
				    || (step->end > job_cond->usage_end))
					step->end = job_cond->usage_end;
			}

			/* figure this out by start stop */
			step->suspended =
				slurm_atoul(step_row[STEP_REQ_SUSPENDED]);
			if (!step->start) {
				step->elapsed = 0;
			} else if (!step->end) {
				step->elapsed = now - step->start;
			} else {
				step->elapsed = step->end - step->start;
			}
			step->elapsed -= step->suspended;

			if ((int)step->elapsed < 0)
				step->elapsed = 0;

			step->user_cpu_sec =
				slurm_atoul(step_row[STEP_REQ_USER_SEC]);
			step->user_cpu_usec =
				slurm_atoul(step_row[STEP_REQ_USER_USEC]);
			step->sys_cpu_sec =
				slurm_atoul(step_row[STEP_REQ_SYS_SEC]);
			step->sys_cpu_usec =
				slurm_atoul(step_row[STEP_REQ_SYS_USEC]);
			step->tot_cpu_sec +=
				step->user_cpu_sec + step->sys_cpu_sec;
			step->tot_cpu_usec +=
				step->user_cpu_usec + step->sys_cpu_usec;
			step->stats.disk_read_max =
				atof(step_row[STEP_REQ_MAX_DISK_READ]);
			step->stats.disk_read_max_taskid =
				slurm_atoul(step_row[
					STEP_REQ_MAX_DISK_READ_TASK]);
			step->stats.disk_read_ave =
				atof(step_row[STEP_REQ_AVE_DISK_READ]);
			step->stats.disk_write_max =
				atof(step_row[STEP_REQ_MAX_DISK_WRITE]);
			step->stats.disk_write_max_taskid =
				slurm_atoul(step_row[
					STEP_REQ_MAX_DISK_WRITE_TASK]);
			step->stats.disk_write_ave =
				atof(step_row[STEP_REQ_AVE_DISK_WRITE]);
			step->stats.vsize_max =
				slurm_atoul(step_row[STEP_REQ_MAX_VSIZE]);
			step->stats.vsize_max_taskid =
				slurm_atoul(step_row[STEP_REQ_MAX_VSIZE_TASK]);
			step->stats.vsize_ave =
				atof(step_row[STEP_REQ_AVE_VSIZE]);
			step->stats.rss_max =
				slurm_atoul(step_row[STEP_REQ_MAX_RSS]);
			step->stats.rss_max_taskid =
				slurm_atoul(step_row[STEP_REQ_MAX_RSS_TASK]);
			step->stats.rss_ave =
				atof(step_row[STEP_REQ_AVE_RSS]);
			step->stats.pages_max =
				slurm_atoul(step_row[STEP_REQ_MAX_PAGES]);
			step->stats.pages_max_taskid =
				slurm_atoul(step_row[STEP_REQ_MAX_PAGES_TASK]);
			step->stats.pages_ave =
				atof(step_row[STEP_REQ_AVE_PAGES]);
			step->stats.cpu_min =
				slurm_atoul(step_row[STEP_REQ_MIN_CPU]);
			step->stats.cpu_min_taskid =
				slurm_atoul(step_row[STEP_REQ_MIN_CPU_TASK]);
			step->stats.cpu_ave = atof(step_row[STEP_REQ_AVE_CPU]);
			step->stats.act_cpufreq =
				atof(step_row[STEP_REQ_ACT_CPUFREQ]);
			step->stats.consumed_energy =
				atof(step_row[STEP_REQ_CONSUMED_ENERGY]);
			step->req_cpufreq =
				slurm_atoul(step_row[STEP_REQ_REQ_CPUFREQ]);
			step->stepname = xstrdup(step_row[STEP_REQ_NAME]);
			step->nodes = xstrdup(step_row[STEP_REQ_NODELIST]);
			step->stats.vsize_max_nodeid =
				slurm_atoul(step_row[STEP_REQ_MAX_VSIZE_NODE]);
			step->stats.rss_max_nodeid =
				slurm_atoul(step_row[STEP_REQ_MAX_RSS_NODE]);
			step->stats.pages_max_nodeid =
				slurm_atoul(step_row[STEP_REQ_MAX_PAGES_NODE]);
			step->stats.cpu_min_nodeid =
				slurm_atoul(step_row[STEP_REQ_MIN_CPU_NODE]);

			step->requid =
				slurm_atoul(step_row[STEP_REQ_KILL_REQUID]);
		}
		mysql_free_result(step_result);

		if (!job->track_steps) {
			/* If we don't have track_steps we want to see
			   if we have multiple steps.  If we only have
			   1 step check the job name against the step
			   name in most all cases it will be
			   different.  If it is different print out
			   the step separate.
			*/
			if (list_count(job->steps) > 1)
				job->track_steps = 1;
			else if (step && step->stepname && job->jobname) {
				if (strcmp(step->stepname, job->jobname))
					job->track_steps = 1;
			}
		}
	skip_steps:
		/* need to reset here to make the above test valid */
		step = NULL;
	}
	mysql_free_result(result);

end_it:
	if (local_cluster_list)
		list_destroy(local_cluster_list);

	if (rc == SLURM_SUCCESS)
		list_transfer(sent_list, job_list);

	list_destroy(job_list);
	return rc;
}
Ejemplo n.º 30
0
/* Attempt to schedule a specific job on specific available nodes
 * IN job_ptr - job to schedule
 * IN/OUT avail_bitmap - nodes available/selected to use
 * IN exc_core_bitmap - cores which can not be used
 * RET SLURM_SUCCESS on success, otherwise an error code
 */
static int  _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap,
		       uint32_t min_nodes, uint32_t max_nodes,
		       uint32_t req_nodes, bitstr_t *exc_core_bitmap)
{
	bitstr_t *tmp_bitmap;
	int rc = SLURM_SUCCESS;
	int feat_cnt = _num_feature_count(job_ptr);
	List preemptee_candidates = NULL;

	if (feat_cnt) {
		/* Ideally schedule the job feature by feature,
		 * but I don't want to add that complexity here
		 * right now, so clear the feature counts and try
		 * to schedule. This will work if there is only
		 * one feature count. It should work fairly well
		 * in cases where there are multiple feature
		 * counts. */
		struct job_details *detail_ptr = job_ptr->details;
		ListIterator feat_iter;
		struct feature_record *feat_ptr;
		int i = 0, list_size;
		uint16_t *feat_cnt_orig = NULL, high_cnt = 0;

		/* Clear the feature counts */
		list_size = list_count(detail_ptr->feature_list);
		feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size);
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		if (feat_iter == NULL)
			fatal("list_iterator_create: malloc failure");
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			high_cnt = MAX(high_cnt, feat_ptr->count);
			feat_cnt_orig[i++] = feat_ptr->count;
			feat_ptr->count = 0;
		}
		list_iterator_destroy(feat_iter);

		if ((job_req_node_filter(job_ptr, *avail_bitmap) !=
		     SLURM_SUCCESS) ||
		    (bit_set_count(*avail_bitmap) < high_cnt)) {
			rc = ESLURM_NODES_BUSY;
		} else {
			preemptee_candidates =
					slurm_find_preemptable_jobs(job_ptr);
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       high_cnt, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		}

		/* Restore the feature counts */
		i = 0;
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			feat_ptr->count = feat_cnt_orig[i++];
		}
		list_iterator_destroy(feat_iter);
		xfree(feat_cnt_orig);
	} else {
		/* Try to schedule the job. First on dedicated nodes
		 * then on shared nodes (if so configured). */
		uint16_t orig_shared;
		time_t now = time(NULL);
		char str[100];

		preemptee_candidates = slurm_find_preemptable_jobs(job_ptr);
		orig_shared = job_ptr->details->shared;
		job_ptr->details->shared = 0;
		tmp_bitmap = bit_copy(*avail_bitmap);

		if (exc_core_bitmap) {
			bit_fmt(str, (sizeof(str) - 1), exc_core_bitmap);
			debug2(" _try_sched with exclude core bitmap: %s",str);
		}

		rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes,
				       max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL,
				       exc_core_bitmap);

		job_ptr->details->shared = orig_shared;

		if (((rc != SLURM_SUCCESS) || (job_ptr->start_time > now)) &&
		    (orig_shared != 0)) {
			FREE_NULL_BITMAP(*avail_bitmap);
			*avail_bitmap= tmp_bitmap;
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       min_nodes, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		} else
			FREE_NULL_BITMAP(tmp_bitmap);
	}

	if (preemptee_candidates)
		list_destroy(preemptee_candidates);
	return rc;

}