Esempio n. 1
0
static void _update_block_record(sview_block_info_t *block_ptr,
				 GtkTreeStore *treestore)
{
	char cnode_cnt[20], cnode_cnt2[20];
	char *tmp_char = NULL, *tmp_char2 = NULL, *tmp_char3 = NULL;

	convert_num_unit((float)block_ptr->cnode_cnt, cnode_cnt,
			 sizeof(cnode_cnt), UNIT_NONE, NO_VAL,
			 working_sview_config.convert_flags);
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		convert_num_unit((float)block_ptr->cnode_err_cnt, cnode_cnt2,
				 sizeof(cnode_cnt), UNIT_NONE, NO_VAL,
				 working_sview_config.convert_flags);
		tmp_char3 = xstrdup_printf("%s/%s", cnode_cnt, cnode_cnt2);
	} else
		tmp_char3 = cnode_cnt;

	tmp_char = conn_type_string_full(block_ptr->bg_conn_type);
	tmp_char2 = _set_running_job_str(block_ptr->job_list, 0);
	/* Combining these records provides a slight performance improvement */
	gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
			   SORTID_BLOCK,        block_ptr->bg_block_name,
			   SORTID_COLOR,
				sview_colors[block_ptr->color_inx],
			   SORTID_COLOR_INX,    block_ptr->color_inx,
			   SORTID_CONN,		tmp_char,
			   SORTID_IMAGEMLOADER, block_ptr->imagemloader,
			   SORTID_JOB,          tmp_char2,
			   SORTID_NODE_INX,     block_ptr->mp_inx,
			   SORTID_NODE_CNT,     tmp_char3,
			   SORTID_NODELIST,     block_ptr->mp_str,
			   SORTID_PARTITION,    block_ptr->slurm_part_name,
			   SORTID_REASON,       block_ptr->reason,
			   SORTID_SMALL_BLOCK,  block_ptr->small_block,
			   SORTID_STATE,
				bg_block_state_string(block_ptr->state),
			   SORTID_UPDATED,      1,
			   -1);
	xfree(tmp_char);
	xfree(tmp_char2);
	if (cluster_flags & CLUSTER_FLAG_BGQ)
		xfree(tmp_char3);

	if (cluster_flags & CLUSTER_FLAG_BGP) {
		gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
				   SORTID_IMAGERAMDISK, block_ptr->imageramdisk,
				   SORTID_IMAGELINUX,   block_ptr->imagelinux,
				   -1);
	} else if (cluster_flags & CLUSTER_FLAG_BGL) {
		gtk_tree_store_set(treestore, &block_ptr->iter_ptr,
				   SORTID_IMAGERAMDISK, block_ptr->imageramdisk,
				   SORTID_IMAGELINUX,   block_ptr->imagelinux,
				   SORTID_IMAGEBLRTS,   block_ptr->imageblrts,
				   SORTID_USE,
					node_use_string(block_ptr->bg_node_use),
				   -1);
	}

	return;
}
Esempio n. 2
0
/*
 * _bg_report - download and print current bgblock state information
 */
static int _bg_report(block_info_msg_t *block_ptr)
{
	int i;

	if (!block_ptr) {
		slurm_perror("No block_ptr given");
		return SLURM_ERROR;
	}

	if (!params.no_header)
		printf("BG_BLOCK         MIDPLANES       STATE    CONNECTION USE\n");
/*                      1234567890123456 123456789012 12345678 1234567890 12345+ */
/*                      RMP_22Apr1544018 bg[123x456]  READY    TORUS      COPROCESSOR */

	for (i=0; i<block_ptr->record_count; i++) {
		char *conn_str = conn_type_string_full(
			block_ptr->block_array[i].conn_type);
		printf("%-16.16s %-15.15s %-8.8s %-10.10s %s\n",
		       block_ptr->block_array[i].bg_block_id,
		       block_ptr->block_array[i].mp_str,
		       bg_block_state_string(
			       block_ptr->block_array[i].state),
		       conn_str,
		       node_use_string(
			       block_ptr->block_array[i].node_use));
		xfree(conn_str);
	}

	return SLURM_SUCCESS;
}
static int _print_text_part(partition_info_t *part_ptr,
			    db2_block_info_t *db2_info_ptr)
{
	int printed = 0;
	int tempxcord;
	int prefixlen;
	int i = 0;
	int width = 0;
	char *nodes = NULL, time_buf[20], *conn_str = NULL;
	char tmp_cnt[8];
	char tmp_char[8];

	if (params.cluster_flags & CLUSTER_FLAG_BG)
		convert_num_unit((float)part_ptr->total_nodes, tmp_cnt,
				 sizeof(tmp_cnt), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
	else
		snprintf(tmp_cnt, sizeof(tmp_cnt), "%u", part_ptr->total_nodes);

	if (!params.commandline) {
		mvwprintw(text_win,
			  main_ycord,
			  main_xcord, "%c",
			  part_ptr->flags);
		main_xcord += 4;

		if (part_ptr->name) {
			mvwprintw(text_win,
				  main_ycord,
				  main_xcord, "%.9s",
				  part_ptr->name);
			main_xcord += 10;
			if (params.display != BGPART) {
				char *tmp_state;
				if (part_ptr->state_up == PARTITION_INACTIVE)
					tmp_state = "inact";
				else if (part_ptr->state_up == PARTITION_UP)
					tmp_state = "up";
				else if (part_ptr->state_up == PARTITION_DOWN)
					tmp_state = "down";
				else if (part_ptr->state_up == PARTITION_DRAIN)
					tmp_state = "drain";
				else
					tmp_state = "unk";
				mvwprintw(text_win, main_ycord, main_xcord,
					  tmp_state);
				main_xcord += 7;

				if (part_ptr->max_time == INFINITE)
					snprintf(time_buf, sizeof(time_buf),
						 "infinite");
				else {
					secs2time_str((part_ptr->max_time
						       * 60),
						      time_buf,
						      sizeof(time_buf));
				}

				width = strlen(time_buf);
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord + (9 - width),
					  "%s",
					  time_buf);
				main_xcord += 11;
			}
		} else
			main_xcord += 10;

		if (params.display == BGPART) {
			if (db2_info_ptr) {
				char *job_running = _set_running_job_str(
					db2_info_ptr->job_list, 1);
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "%.16s",
					  db2_info_ptr->bg_block_name);
				main_xcord += 18;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "%.7s",
					  bg_block_state_string(
						  db2_info_ptr->state));
				main_xcord += 8;

				snprintf(tmp_char, sizeof(tmp_char),
					 "%s", job_running);
				xfree(job_running);

				mvwprintw(text_win,
					  main_ycord,
					  main_xcord,
					  "%.8s", tmp_char);
				main_xcord += 8;

				conn_str = conn_type_string_full(
					db2_info_ptr->bg_conn_type);
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "%.7s",
					  conn_str);
				xfree(conn_str);
				main_xcord += 8;

			} else {
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 18;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 8;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 8;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 9;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 7;
				mvwprintw(text_win,
					  main_ycord,
					  main_xcord, "?");
				main_xcord += 10;
			}
		}
		mvwprintw(text_win,
			  main_ycord,
			  main_xcord, "%5s", tmp_cnt);

		main_xcord += 7;

		tempxcord = main_xcord;

		if (params.display == BGPART)
			nodes = part_ptr->allow_groups;
		else
			nodes = part_ptr->nodes;
		i = 0;
		prefixlen = i;
		while (nodes && nodes[i]) {
			width = getmaxx(text_win) - 1 - main_xcord;

			if (!prefixlen && (nodes[i] == '[') &&
			    (nodes[i - 1] == ','))
				prefixlen = i + 1;

			if (nodes[i - 1] == ',' && (width - 12) <= 0) {
				main_ycord++;
				main_xcord = tempxcord + prefixlen;
			} else if (main_xcord >= getmaxx(text_win)) {
				main_ycord++;
				main_xcord = tempxcord + prefixlen;
			}

			if ((printed = mvwaddch(text_win,
						main_ycord,
						main_xcord,
						nodes[i])) < 0)
				return printed;
			main_xcord++;

			i++;
		}
		if ((params.display == BGPART) && db2_info_ptr &&
		    (db2_info_ptr->ionode_str)) {
			mvwprintw(text_win,
				  main_ycord,
				  main_xcord, "[%s]",
				  db2_info_ptr->ionode_str);
		}

		main_xcord = 1;
		main_ycord++;
	} else {
		if (part_ptr->name) {
			printf("%9.9s ", part_ptr->name);

			if (params.display != BGPART) {
				if (part_ptr->state_up == PARTITION_INACTIVE)
					printf(" inact ");
				else if (part_ptr->state_up == PARTITION_UP)
					printf("   up ");
				else if (part_ptr->state_up == PARTITION_DOWN)
					printf(" down ");
				else if (part_ptr->state_up == PARTITION_DRAIN)
					printf(" drain ");
				else
					printf(" unk ");

				if (part_ptr->max_time == INFINITE)
					snprintf(time_buf, sizeof(time_buf),
						 "infinite");
				else {
					secs2time_str((part_ptr->max_time
						       * 60),
						      time_buf,
						      sizeof(time_buf));
				}

				printf("%9.9s ", time_buf);
			}
		}

		if (params.display == BGPART) {
			if (db2_info_ptr) {
				char *job_running = _set_running_job_str(
					db2_info_ptr->job_list, 1);
				printf("%16.16s ",
				       db2_info_ptr->bg_block_name);
				printf("%-7.7s ",
				       bg_block_state_string(
					       db2_info_ptr->state));

				printf("%8.8s ", job_running);
				xfree(job_running);

				conn_str = conn_type_string_full(
					db2_info_ptr->bg_conn_type);
				printf("%8.8s ", conn_str);
				xfree(conn_str);
			}
		}

		printf("%5s ", tmp_cnt);

		if (params.display == BGPART)
			nodes = part_ptr->allow_groups;
		else
			nodes = part_ptr->nodes;

		if ((params.display == BGPART) && db2_info_ptr &&
		    (db2_info_ptr->ionode_str)) {
			printf("%s[%s]\n", nodes, db2_info_ptr->ionode_str);
		} else
			printf("%s\n",nodes);
	}
	return printed;
}
Esempio n. 4
0
/*
 * slurm_sprint_block_info - output information about a specific Bluegene
 *	block based upon message as loaded using slurm_load_block
 * IN block_ptr - an individual partition information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *slurm_sprint_block_info(
	block_info_t * block_ptr, int one_liner)
{
	int j;
	char tmp1[16], tmp2[16], *tmp_char = NULL;
	char *out = NULL;
	char *line_end = "\n   ";
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (one_liner)
		line_end = " ";

	/****** Line 1 ******/
	convert_num_unit((float)block_ptr->cnode_cnt, tmp1, sizeof(tmp1),
			 UNIT_NONE, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		convert_num_unit((float)block_ptr->cnode_err_cnt, tmp2,
				 sizeof(tmp2), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
		tmp_char = xstrdup_printf("%s/%s", tmp1, tmp2);
	} else
		tmp_char = tmp1;

	out = xstrdup_printf("BlockName=%s TotalNodes=%s State=%s%s",
			     block_ptr->bg_block_id, tmp_char,
			     bg_block_state_string(block_ptr->state),
			     line_end);
	if (cluster_flags & CLUSTER_FLAG_BGQ)
		xfree(tmp_char);
	/****** Line 2 ******/
	j = 0;
	if (block_ptr->job_list)
		j = list_count(block_ptr->job_list);

	if (!j)
		xstrcat(out, "JobRunning=NONE ");
	else if (j == 1) {
		block_job_info_t *block_job = list_peek(block_ptr->job_list);
		xstrfmtcat(out, "JobRunning=%u ", block_job->job_id);
	} else
		xstrcat(out, "JobRunning=Multiple ");

	tmp_char = conn_type_string_full(block_ptr->conn_type);
	xstrfmtcat(out, "ConnType=%s", tmp_char);
	xfree(tmp_char);
	if (cluster_flags & CLUSTER_FLAG_BGL)
		xstrfmtcat(out, " NodeUse=%s",
			   node_use_string(block_ptr->node_use));

	xstrcat(out, line_end);

	/****** Line 3 ******/
	if (block_ptr->ionode_str)
		xstrfmtcat(out, "MidPlanes=%s[%s] MPIndices=",
			   block_ptr->mp_str, block_ptr->ionode_str);
	else
		xstrfmtcat(out, "MidPlanes=%s MPIndices=",
			   block_ptr->mp_str);
	for (j = 0;
	     (block_ptr->mp_inx && (block_ptr->mp_inx[j] != -1));
	     j+=2) {
		if (j > 0)
			xstrcat(out, ",");
		xstrfmtcat(out, "%d-%d", block_ptr->mp_inx[j],
			   block_ptr->mp_inx[j+1]);
	}
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "MloaderImage=%s%s",
		   block_ptr->mloaderimage, line_end);

	if (cluster_flags & CLUSTER_FLAG_BGL) {
		/****** Line 5 ******/
		xstrfmtcat(out, "BlrtsImage=%s%s", block_ptr->blrtsimage,
			   line_end);
		/****** Line 6 ******/
		xstrfmtcat(out, "LinuxImage=%s%s", block_ptr->linuximage,
			   line_end);
		/****** Line 7 ******/
		xstrfmtcat(out, "RamdiskImage=%s", block_ptr->ramdiskimage);
	} else if (cluster_flags & CLUSTER_FLAG_BGP) {
		/****** Line 5 ******/
		xstrfmtcat(out, "CnloadImage=%s%s", block_ptr->linuximage,
			   line_end);
		/****** Line 6 ******/
		xstrfmtcat(out, "IoloadImage=%s", block_ptr->ramdiskimage);
	}

	if (block_ptr->reason)
		xstrfmtcat(out, "Reason=%s%s",
			   block_ptr->reason, line_end);

	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
Esempio n. 5
0
/* Perform job initiation work */
static void _start_agent(bg_action_t *bg_action_ptr)
{
	int rc, set_user_rc = SLURM_SUCCESS;
	bg_record_t *bg_record = NULL;
	bg_record_t *found_record = NULL;
	ListIterator itr;
	List delete_list = NULL;
	int requeue_job = 0;
	uint32_t req_job_id = bg_action_ptr->job_ptr->job_id;
	bool block_inited = 0;
	bool delete_it = 0;

	slurm_mutex_lock(&block_state_mutex);
	bg_record = find_bg_record_in_list(bg_lists->main,
					   bg_action_ptr->bg_block_id);

	if (!bg_record) {
		bg_record->modifying = 0;
		slurm_mutex_unlock(&block_state_mutex);
		error("block %s not found in bg_lists->main",
		      bg_action_ptr->bg_block_id);
		bg_requeue_job(req_job_id, 1, 0, JOB_BOOT_FAIL, false);
		return;
	}

	if ((bg_record->job_running <= NO_JOB_RUNNING)
	    && !find_job_in_bg_record(bg_record, req_job_id)) {
		bg_record->modifying = 0;
		// bg_reset_block(bg_record); should already happened
		slurm_mutex_unlock(&block_state_mutex);
		debug("job %u finished during the queueing job "
		      "(everything is ok)",
		      req_job_id);
		return;
	}

	if ((bg_record->state == BG_BLOCK_TERM) || bg_record->free_cnt) {
		/* It doesn't appear state of a small block
		   (conn_type) is held on a BGP system so
		   if we to reset it so, just set the reboot flag and
		   handle it later in that code. */
		bg_action_ptr->reboot = 1;
	}

	delete_list = list_create(NULL);
	itr = list_iterator_create(bg_lists->main);
	while ((found_record = list_next(itr))) {
		if (bg_record == found_record)
			continue;

		if (!blocks_overlap(bg_record, found_record)) {
			debug2("block %s isn't part of %s",
			       found_record->bg_block_id,
			       bg_record->bg_block_id);
			continue;
		}

		if (found_record->job_ptr
		    || (found_record->job_list
			&& list_count(found_record->job_list))) {
			struct job_record *job_ptr = found_record->job_ptr;
			if (!found_record->job_ptr)
				job_ptr = find_job_in_bg_record(
					found_record, NO_VAL);
			error("Trying to start job %u on block %s, "
			      "but there is a job %u running on an overlapping "
			      "block %s it will not end until %ld.  "
			      "This should never happen.",
			      req_job_id,
			      bg_record->bg_block_id,
			      job_ptr->job_id,
			      found_record->bg_block_id,
			      job_ptr->end_time);
			requeue_job = 1;
			break;
		}

		debug2("need to make sure %s is free, it's part of %s",
		       found_record->bg_block_id,
		       bg_record->bg_block_id);
		list_push(delete_list, found_record);
	}
	list_iterator_destroy(itr);

	if (requeue_job) {
		FREE_NULL_LIST(delete_list);

		bg_reset_block(bg_record, bg_action_ptr->job_ptr);

		bg_record->modifying = 0;
		slurm_mutex_unlock(&block_state_mutex);
		bg_requeue_job(req_job_id, 0, 0, JOB_BOOT_FAIL, false);
		return;
	}

	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode == LAYOUT_DYNAMIC)
		delete_it = 1;
	free_block_list(req_job_id, delete_list, delete_it, 1);
	FREE_NULL_LIST(delete_list);

	while (1) {
		slurm_mutex_lock(&block_state_mutex);
		/* Failure will unlock block_state_mutex so no need to
		   unlock before return.  No need to reset modifying
		   here if the block doesn't exist.
		*/
		if (!_make_sure_block_still_exists(bg_action_ptr, bg_record)) {
			error("Problem with deallocating blocks to run job %u "
			      "on block %s", req_job_id,
			      bg_action_ptr->bg_block_id);
			return;
		}
		/* If another thread is freeing this block we need to
		   wait until it is done or we will get into a state
		   where this job will be killed.
		*/
		if (!bg_record->free_cnt)
			break;
		debug("Waiting for block %s to free for job %u.  "
		      "%d thread(s) trying to free it",
		      bg_record->bg_block_id, req_job_id,
		      bg_record->free_cnt);
		slurm_mutex_unlock(&block_state_mutex);
		sleep(1);
	}
	/* This was set in the start_job function to close the above
	   window where a job could be mistakenly requeued if another
	   thread is trying to free this block as we are trying to run
	   on it, which is fine since we will reboot it later.
	*/
	bg_record->modifying = 0;

	if ((bg_record->job_running <= NO_JOB_RUNNING)
	    && !find_job_in_bg_record(bg_record, req_job_id)) {
		// bg_reset_block(bg_record); should already happened
		slurm_mutex_unlock(&block_state_mutex);
		debug("job %u already finished before boot",
		      req_job_id);
		return;
	}

	if (bg_record->job_list
	    && (bg_action_ptr->job_ptr->total_cpus != bg_record->cpu_cnt)
	    && (list_count(bg_record->job_list) != 1)) {
		/* We don't allow modification of a block or reboot of
		   a block if we are running multiple jobs on the
		   block.
		*/
		debug2("no reboot");
		goto no_reboot;
	}

	rc = 0;
#ifdef HAVE_BGL
	if (bg_action_ptr->blrtsimage
	   && xstrcasecmp(bg_action_ptr->blrtsimage, bg_record->blrtsimage)) {
		debug3("changing BlrtsImage from %s to %s",
		       bg_record->blrtsimage, bg_action_ptr->blrtsimage);
		xfree(bg_record->blrtsimage);
		bg_record->blrtsimage = xstrdup(bg_action_ptr->blrtsimage);
		rc = 1;
	}
#elif defined HAVE_BGP
	if ((bg_action_ptr->conn_type[0] >= SELECT_SMALL)
	   && (bg_action_ptr->conn_type[0] != bg_record->conn_type[0])) {
		if (bg_conf->slurm_debug_level >= LOG_LEVEL_DEBUG3) {
			char *req_conn_type =
				conn_type_string_full(bg_action_ptr->conn_type);
			char *conn_type =
				conn_type_string_full(bg_record->conn_type);
			debug3("changing small block mode from %s to %s",
			       conn_type, req_conn_type);
			xfree(req_conn_type);
			xfree(conn_type);
		}
		rc = 1;
# ifndef HAVE_BG_FILES
		/* since we don't check state on an emulated system we
		 * have to change it here
		 */
		bg_record->conn_type[0] = bg_action_ptr->conn_type[0];
# endif
	}
#endif

#ifdef HAVE_BG_L_P
	if (bg_action_ptr->linuximage
	   && xstrcasecmp(bg_action_ptr->linuximage, bg_record->linuximage)) {
# ifdef HAVE_BGL
		debug3("changing LinuxImage from %s to %s",
		       bg_record->linuximage, bg_action_ptr->linuximage);
# else
		debug3("changing CnloadImage from %s to %s",
		       bg_record->linuximage, bg_action_ptr->linuximage);
# endif
		xfree(bg_record->linuximage);
		bg_record->linuximage = xstrdup(bg_action_ptr->linuximage);
		rc = 1;
	}
	if (bg_action_ptr->ramdiskimage
	   && xstrcasecmp(bg_action_ptr->ramdiskimage,
			 bg_record->ramdiskimage)) {
# ifdef HAVE_BGL
		debug3("changing RamDiskImage from %s to %s",
		       bg_record->ramdiskimage, bg_action_ptr->ramdiskimage);
# else
		debug3("changing IoloadImage from %s to %s",
		       bg_record->ramdiskimage, bg_action_ptr->ramdiskimage);
# endif
		xfree(bg_record->ramdiskimage);
		bg_record->ramdiskimage = xstrdup(bg_action_ptr->ramdiskimage);
		rc = 1;
	}
#endif
	if (bg_action_ptr->mloaderimage
	   && xstrcasecmp(bg_action_ptr->mloaderimage,
			 bg_record->mloaderimage)) {
		debug3("changing MloaderImage from %s to %s",
		       bg_record->mloaderimage, bg_action_ptr->mloaderimage);
		xfree(bg_record->mloaderimage);
		bg_record->mloaderimage = xstrdup(bg_action_ptr->mloaderimage);
		rc = 1;
	}

	if (rc || bg_action_ptr->reboot) {
		bg_record->modifying = 1;

		/* Increment free_cnt to make sure we don't loose this
		 * block since bg_free_block will unlock block_state_mutex.
		 */
		bg_record->free_cnt++;
		bg_free_block(bg_record, 1, 1);
		bg_record->free_cnt--;

#if defined HAVE_BG_FILES && defined HAVE_BG_L_P
#ifdef HAVE_BGL
		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_BlrtsImg,
					      bg_record->blrtsimage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_BlrtsImg): %s",
			      bg_err_str(rc));

		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_LinuxImg,
					      bg_record->linuximage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_LinuxImg): %s",
			      bg_err_str(rc));

		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_RamdiskImg,
					      bg_record->ramdiskimage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_RamdiskImg): %s",
			      bg_err_str(rc));

#elif defined HAVE_BGP
		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_CnloadImg,
					      bg_record->linuximage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_CnloadImg): %s",
			      bg_err_str(rc));

		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_IoloadImg,
					      bg_record->ramdiskimage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_IoloadImg): %s",
			      bg_err_str(rc));

		if (bg_action_ptr->conn_type[0] > SELECT_SMALL) {
			char *conn_type = NULL;
			switch(bg_action_ptr->conn_type[0]) {
			case SELECT_HTC_S:
				conn_type = "s";
				break;
			case SELECT_HTC_D:
				conn_type = "d";
				break;
			case SELECT_HTC_V:
				conn_type = "v";
				break;
			case SELECT_HTC_L:
				conn_type = "l";
				break;
			default:
				break;
			}
			/* the option has to be set before the pool can be
			   set */
			if ((rc = bridge_block_modify(
				     bg_record->bg_block_id,
				     RM_MODIFY_Options,
				     conn_type)) != SLURM_SUCCESS)
				error("bridge_set_data(RM_MODIFY_Options): %s",
				      bg_err_str(rc));
		}
#endif
		if ((rc = bridge_block_modify(bg_record->bg_block_id,
					      RM_MODIFY_MloaderImg,
					      bg_record->mloaderimage))
		    != SLURM_SUCCESS)
			error("bridge_block_modify(RM_MODIFY_MloaderImg): %s",
			      bg_err_str(rc));

#endif
		bg_record->modifying = 0;
	}

no_reboot:
	if (bg_record->state == BG_BLOCK_FREE) {
		if ((rc = bridge_block_boot(bg_record)) != SLURM_SUCCESS) {
			char reason[200];

			bg_record->boot_state = 0;
			bg_record->boot_count = 0;

			if (rc == BG_ERROR_INVALID_STATE)
				snprintf(reason, sizeof(reason),
					 "Block %s is in an incompatible "
					 "state.  This usually means "
					 "hardware is allocated "
					 "by another block (maybe outside "
					 "of SLURM).",
					 bg_record->bg_block_id);
			else
				snprintf(reason, sizeof(reason),
					 "Couldn't boot block %s: %s",
					 bg_record->bg_block_id,
					 bg_err_str(rc));
			slurm_mutex_unlock(&block_state_mutex);
			requeue_and_error(bg_record, reason);
			return;
		}
	} else if (bg_record->state == BG_BLOCK_BOOTING) {
#ifdef HAVE_BG_FILES
		bg_record->boot_state = 1;
#else
		if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
			list_push(bg_lists->booted, bg_record);
		bg_record->state = BG_BLOCK_INITED;
		last_bg_update = time(NULL);
#endif
	}


	if ((bg_record->job_running <= NO_JOB_RUNNING)
	    && !find_job_in_bg_record(bg_record, req_job_id)) {
		slurm_mutex_unlock(&block_state_mutex);
		debug("job %u finished during the start of the boot "
		      "(everything is ok)",
		      req_job_id);
		return;
	}

	/* Don't reset boot_count, it will be reset when state
	   changes, and needs to outlast a job allocation.
	*/
	/* bg_record->boot_count = 0; */
	if (bg_record->state == BG_BLOCK_INITED) {
		debug("block %s is already ready.", bg_record->bg_block_id);
		/* Just in case reset the boot flags */
		bg_record->boot_state = 0;
		bg_record->boot_count = 0;
		set_user_rc = bridge_block_sync_users(bg_record);
		block_inited = 1;
	}
	slurm_mutex_unlock(&block_state_mutex);

	/* This lock needs to happen after the block_state_mutex to
	   avoid deadlock.
	*/
	if (block_inited && bg_action_ptr->job_ptr) {
		slurmctld_lock_t job_write_lock = {
			NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
		lock_slurmctld(job_write_lock);
		bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING);
		last_job_update = time(NULL);
		unlock_slurmctld(job_write_lock);
	}

	if (set_user_rc == SLURM_ERROR) {
		sleep(2);
		/* wait for the slurmd to begin
		   the batch script, slurm_fail_job()
		   is a no-op if issued prior
		   to the script initiation do clean up just
		   in case the fail job isn't ran */
		(void) slurm_fail_job(req_job_id, JOB_BOOT_FAIL);
	}
}
Esempio n. 6
0
/* write select job info to a string
 * IN jobinfo - a select job credential
 * IN mode    - print mode, see enum select_print_mode
 * RET        - char * containing string of request
 */
extern char *xstrdup_select_jobinfo(select_jobinfo_t *jobinfo, int mode)
{
	char *geo = NULL;
	int i;
	char *tmp_image = "default";
	char *buf = NULL;
	char *header = "CONNECT REBOOT ROTATE GEOMETRY BLOCK_ID";
	bool print_x = 1;
	char *conn_type = NULL;

	if ((mode != SELECT_PRINT_DATA)
	    && jobinfo && (jobinfo->magic != JOBINFO_MAGIC)) {
		error("xstrdup_jobinfo: jobinfo magic bad");
		return NULL;
	}

	if (jobinfo == NULL) {
		if (mode != SELECT_PRINT_HEAD) {
			error("xstrdup_jobinfo: jobinfo bad");
			return NULL;
		}
		xstrcat(buf, header);
		return buf;
	}

	if (mode == SELECT_PRINT_GEOMETRY)
		print_x = 0;

	if (jobinfo->geometry[0] == (uint16_t) NO_VAL) {
		for (i=0; i<SYSTEM_DIMENSIONS; i++) {
			if (geo && print_x)
				xstrcat(geo, "x0");
			else
				xstrcat(geo, "0");
		}
	} else if (mode != SELECT_PRINT_START_LOC) {
		geo = give_geo(jobinfo->geometry, jobinfo->dim_cnt, print_x);
		conn_type = conn_type_string_full(jobinfo->conn_type);
	}
	switch (mode) {
	case SELECT_PRINT_HEAD:
		xstrcat(buf, header);
		break;
	case SELECT_PRINT_DATA:
		xstrfmtcat(buf,
			   "%7.7s %6.6s %6.6s    %s %-16s",
			   conn_type,
			   _yes_no_string(jobinfo->reboot),
			   _yes_no_string(jobinfo->rotate),
			   geo,
			   jobinfo->bg_block_id);
		break;
	case SELECT_PRINT_MIXED:
		xstrfmtcat(buf,
			   "Connection=%s Reboot=%s Rotate=%s "
			   "Geometry=%s Block_ID=%s",
			   conn_type,
			   _yes_no_string(jobinfo->reboot),
			   _yes_no_string(jobinfo->rotate),
			   geo,
			   jobinfo->bg_block_id);
		break;
	case SELECT_PRINT_BG_ID:
		xstrfmtcat(buf, "%s", jobinfo->bg_block_id);
		break;
	case SELECT_PRINT_NODES:
		if (jobinfo->ionode_str && jobinfo->ionode_str[0])
			xstrfmtcat(buf, "%s[%s]",
				   jobinfo->mp_str, jobinfo->ionode_str);
		else
			xstrfmtcat(buf, "%s", jobinfo->mp_str);
		break;
	case SELECT_PRINT_CONNECTION:
		xstrfmtcat(buf, "%s", conn_type);
		break;
	case SELECT_PRINT_REBOOT:
		xstrfmtcat(buf, "%s",
			   _yes_no_string(jobinfo->reboot));
		break;
	case SELECT_PRINT_ROTATE:
		xstrfmtcat(buf, "%s",
			   _yes_no_string(jobinfo->rotate));
		break;
	case SELECT_PRINT_GEOMETRY:
		xstrfmtcat(buf, "%s", geo);
		break;
	case SELECT_PRINT_BLRTS_IMAGE:
		if (jobinfo->blrtsimage)
			tmp_image = jobinfo->blrtsimage;
		xstrfmtcat(buf, "%s", tmp_image);
		break;
	case SELECT_PRINT_LINUX_IMAGE:
		if (jobinfo->linuximage)
			tmp_image = jobinfo->linuximage;
		xstrfmtcat(buf, "%s", tmp_image);
		break;
	case SELECT_PRINT_MLOADER_IMAGE:
		if (jobinfo->mloaderimage)
			tmp_image = jobinfo->mloaderimage;
		xstrfmtcat(buf, "%s", tmp_image);
		break;
	case SELECT_PRINT_RAMDISK_IMAGE:
		if (jobinfo->ramdiskimage)
			tmp_image = jobinfo->ramdiskimage;
		xstrfmtcat(buf, "%s", tmp_image);
		break;
	case SELECT_PRINT_START_LOC:
		xfree(geo);
		geo = give_geo(jobinfo->start_loc, jobinfo->dim_cnt, 0);
		xstrfmtcat(buf, "%s", geo);
		break;
	default:
		error("xstrdup_jobinfo: bad mode %d", mode);
	}
	xfree(geo);
	xfree(conn_type);
	return buf;
}
Esempio n. 7
0
static void _print_text_command(allocated_block_t *allocated_block)
{
	char *tmp_char = NULL;

	wattron(text_win,
		COLOR_PAIR(allocated_block->color));

	mvwprintw(text_win, main_ycord,
		  main_xcord, "%c", allocated_block->letter);
	main_xcord += 4;

	tmp_char = conn_type_string_full(allocated_block->request->conn_type);
	mvwprintw(text_win, main_ycord, main_xcord, tmp_char);
	xfree(tmp_char);
	main_xcord += 8;

	if (allocated_block->request->rotate)
		mvwprintw(text_win, main_ycord,
			  main_xcord, "Y");
	else
		mvwprintw(text_win, main_ycord,
			  main_xcord, "N");
	main_xcord += 7;

	if (allocated_block->request->elongate)
		mvwprintw(text_win, main_ycord,
			  main_xcord, "Y");
	else
		mvwprintw(text_win, main_ycord,
			  main_xcord, "N");
	main_xcord += 7;

	mvwprintw(text_win, main_ycord,
		  main_xcord, "%d", allocated_block->request->size);
	main_xcord += 10;

	if (allocated_block->request->conn_type[0] >= SELECT_SMALL) {
#ifdef HAVE_BGP
		mvwprintw(text_win, main_ycord,
			  main_xcord, "%d",
			  allocated_block->request->small16);
		main_xcord += 5;
#endif

		mvwprintw(text_win, main_ycord,
			  main_xcord, "%d",
			  allocated_block->request->small32);
		main_xcord += 5;

#ifndef HAVE_BGL
		mvwprintw(text_win, main_ycord,
			  main_xcord, "%d",
			  allocated_block->request->small64);
		main_xcord += 5;
#endif

		mvwprintw(text_win, main_ycord,
			  main_xcord, "%d",
			  allocated_block->request->small128);
		main_xcord += 6;

#ifndef HAVE_BGL
		mvwprintw(text_win, main_ycord,
			  main_xcord, "%d",
			  allocated_block->request->small256);
		main_xcord += 6;
#endif
	} else {
#ifdef HAVE_BGL
		main_xcord += 11;
#elif defined HAVE_BGP
		main_xcord += 27;
#else
		main_xcord += 22;
#endif
	}
	mvwprintw(text_win, main_ycord,
		  main_xcord, "%s",
		  allocated_block->request->save_name);
	main_xcord = 1;
	main_ycord++;
	wattroff(text_win,
		 COLOR_PAIR(allocated_block->color));
	return;
}
Esempio n. 8
0
static void _layout_block_record(GtkTreeView *treeview,
				 sview_block_info_t *block_ptr,
				 int update)
{
	char tmp_cnt[18], tmp_cnt2[18];
	char *tmp_char = NULL;
	GtkTreeIter iter;
	GtkTreeStore *treestore =
		GTK_TREE_STORE(gtk_tree_view_get_model(treeview));

	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_NODELIST),
				   block_ptr->mp_str);
	tmp_char = conn_type_string_full(block_ptr->bg_conn_type);
	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_CONN),
				   tmp_char);
	xfree(tmp_char);

	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGEMLOADER),
					   block_ptr->imagemloader);
	} else if (cluster_flags & CLUSTER_FLAG_BGP) {
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGELINUX),
					   block_ptr->imagelinux);
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGERAMDISK),
					   block_ptr->imageramdisk);
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGEMLOADER),
					   block_ptr->imagemloader);
	} else if (cluster_flags & CLUSTER_FLAG_BGL) {
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGEBLRTS),
					   block_ptr->imageblrts);
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGELINUX),
					   block_ptr->imagelinux);
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGEMLOADER),
					   block_ptr->imagemloader);
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_IMAGERAMDISK),
					   block_ptr->imageramdisk);
	}

	tmp_char = _set_running_job_str(block_ptr->job_list, 0);

	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_JOB),
				   tmp_char);
	xfree(tmp_char);
	if (cluster_flags & CLUSTER_FLAG_BGL) {
		add_display_treestore_line(update, treestore, &iter,
					   find_col_name(display_data_block,
							 SORTID_USE),
					   node_use_string(
						   block_ptr->bg_node_use));
	} convert_num_unit((float)block_ptr->cnode_cnt, tmp_cnt,
			   sizeof(tmp_cnt), UNIT_NONE, NO_VAL,
			   working_sview_config.convert_flags);
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		convert_num_unit((float)block_ptr->cnode_err_cnt, tmp_cnt2,
				 sizeof(tmp_cnt2), UNIT_NONE, NO_VAL,
				 working_sview_config.convert_flags);
		tmp_char = xstrdup_printf("%s/%s", tmp_cnt, tmp_cnt2);
	} else
		tmp_char = tmp_cnt;
	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_NODE_CNT),
				   tmp_char);
	if (cluster_flags & CLUSTER_FLAG_BGQ)
		xfree(tmp_char);
	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_PARTITION),
				   block_ptr->slurm_part_name);
	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_STATE),
				   bg_block_state_string(block_ptr->state));
	add_display_treestore_line(update, treestore, &iter,
				   find_col_name(display_data_block,
						 SORTID_REASON),
				   block_ptr->reason);
}
Esempio n. 9
0
/*
 * slurm_sprint_block_info - output information about a specific Bluegene
 *	block based upon message as loaded using slurm_load_block
 * IN block_ptr - an individual partition information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *slurm_sprint_block_info(
	block_info_t * block_ptr, int one_liner)
{
	int j;
	char tmp1[16], *tmp_char = NULL;
	char *out = NULL;
	char *line_end = "\n   ";
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (one_liner)
		line_end = " ";

	/****** Line 1 ******/
	convert_num_unit((float)block_ptr->cnode_cnt, tmp1, sizeof(tmp1),
			 UNIT_NONE);

	out = xstrdup_printf("BlockName=%s TotalNodes=%s State=%s%s",
			     block_ptr->bg_block_id, tmp1,
			     bg_block_state_string(block_ptr->state),
			     line_end);

	/****** Line 2 ******/
	if (block_ptr->job_running > NO_JOB_RUNNING)
		xstrfmtcat(out, "JobRunning=%u ", block_ptr->job_running);
	else
		xstrcat(out, "JobRunning=NONE ");
	tmp_char = conn_type_string_full(block_ptr->conn_type);
	xstrfmtcat(out, "User=%s ConnType=%s",
		   block_ptr->owner_name, tmp_char);
	xfree(tmp_char);
	if(cluster_flags & CLUSTER_FLAG_BGL)
		xstrfmtcat(out, " NodeUse=%s",
			   node_use_string(block_ptr->node_use));

	xstrcat(out, line_end);

	/****** Line 3 ******/
	if(block_ptr->ionode_str)
		xstrfmtcat(out, "MidPlanes=%s[%s] MPIndices=",
			   block_ptr->mp_str, block_ptr->ionode_str);
	else
		xstrfmtcat(out, "MidPlanes=%s MPIndices=",
			   block_ptr->mp_str);
	for (j = 0;
	     (block_ptr->mp_inx && (block_ptr->mp_inx[j] != -1));
	     j+=2) {
		if (j > 0)
			xstrcat(out, ",");
		xstrfmtcat(out, "%d-%d", block_ptr->mp_inx[j],
			   block_ptr->mp_inx[j+1]);
	}
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "MloaderImage=%s%s",
		   block_ptr->mloaderimage, line_end);

	if (cluster_flags & CLUSTER_FLAG_BGL) {
		/****** Line 5 ******/
		xstrfmtcat(out, "BlrtsImage=%s%s", block_ptr->blrtsimage,
			   line_end);
		/****** Line 6 ******/
		xstrfmtcat(out, "LinuxImage=%s%s", block_ptr->linuximage,
			   line_end);
		/****** Line 7 ******/
		xstrfmtcat(out, "RamdiskImage=%s", block_ptr->ramdiskimage);
	} else if (cluster_flags & CLUSTER_FLAG_BGP) {
		/****** Line 5 ******/
		xstrfmtcat(out, "CnloadImage=%s%s", block_ptr->linuximage,
			   line_end);
		/****** Line 6 ******/
		xstrfmtcat(out, "IoloadImage=%s", block_ptr->ramdiskimage);
	}

	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}