Example #1
0
extern int bg_status_update_block_state(bg_record_t *bg_record,
					uint16_t state,
					List kill_job_list)
{
	bool skipped_dealloc = false;
	kill_job_struct_t *freeit = NULL;
	int updated = 0;
	uint16_t real_state = bg_record->state & (~BG_BLOCK_ERROR_FLAG);

	if (real_state == state)
		return 0;

	debug("state of Block %s was %s and now is %s",
	      bg_record->bg_block_id,
	      bg_block_state_string(bg_record->state),
	      bg_block_state_string(state));

	/*
	  check to make sure block went
	  through freeing correctly
	*/
	if ((real_state != BG_BLOCK_TERM
	     && !(bg_record->state & BG_BLOCK_ERROR_FLAG))
	    && state == BG_BLOCK_FREE)
		skipped_dealloc = 1;
	else if ((real_state == BG_BLOCK_INITED)
		 && (state == BG_BLOCK_BOOTING)) {
		/* This means the user did a reboot through
		   mpirun but we missed the state
		   change */
		debug("Block %s skipped rebooting, "
		      "but it really is.",
		      bg_record->bg_block_id);
	} else if ((real_state == BG_BLOCK_TERM)
		   && (state == BG_BLOCK_BOOTING))
		/* This is a funky state IBM says
		   isn't a bug, but all their
		   documentation says this doesn't
		   happen, but IBM says oh yeah, you
		   weren't really suppose to notice
		   that. So we will just skip this
		   state and act like this didn't happen. */
		goto nochange_state;
	real_state = state;
	if (bg_record->state & BG_BLOCK_ERROR_FLAG)
		state |= BG_BLOCK_ERROR_FLAG;

	bg_record->state = state;

	if (real_state == BG_BLOCK_TERM || skipped_dealloc)
		_block_is_deallocating(bg_record, kill_job_list);
	else if (real_state == BG_BLOCK_BOOTING) {
		debug("Setting bootflag for %s", bg_record->bg_block_id);
		bg_record->boot_state = 1;
	} else if (real_state == BG_BLOCK_FREE) {
		/* Make sure block is cleaned up.  If there are
		 * running jobs on the block this happens when they
		 * are cleaned off. */
		if (bg_record->job_running == NO_JOB_RUNNING
		    && (!bg_record->job_list
			|| !list_count(bg_record->job_list)))
			bg_reset_block(bg_record, NULL);
		remove_from_bg_list(bg_lists->booted, bg_record);
	} else if (real_state & BG_BLOCK_ERROR_FLAG) {
		if (bg_record->boot_state)
			error("Block %s in an error state while booting.",
			      bg_record->bg_block_id);
		else
			error("Block %s in an error state.",
			      bg_record->bg_block_id);
		remove_from_bg_list(bg_lists->booted, bg_record);
		trigger_block_error();
	} else if (real_state == BG_BLOCK_INITED) {
		if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
			list_push(bg_lists->booted, bg_record);
	}
	updated = 1;
	last_bg_update = time(NULL);
nochange_state:

	/* check the boot state */
	debug3("boot state for block %s is %d",
	       bg_record->bg_block_id, bg_record->boot_state);
	if (bg_record->boot_state) {
		if (bg_record->state & BG_BLOCK_ERROR_FLAG) {
			/* If we get an error on boot that
			 * means it is a transparent L3 error
			 * and should be trying to fix
			 * itself.  If this is the case we
			 * just hang out waiting for the state
			 * to go to free where we will try to
			 * boot again below.
			 */
			return updated;
		}

		switch (real_state) {
		case BG_BLOCK_BOOTING:
			if (bg_record->job_ptr
			    && !IS_JOB_CONFIGURING(bg_record->job_ptr)) {
				debug3("Setting job %u on block %s "
				       "to configuring",
				       bg_record->job_ptr->job_id,
				       bg_record->bg_block_id);
				bg_record->job_ptr->job_state |=
					JOB_CONFIGURING;
				last_job_update = time(NULL);
			} else if (bg_record->job_list
				   && list_count(bg_record->job_list)) {
				struct job_record *job_ptr;
				ListIterator job_itr =
					list_iterator_create(
						bg_record->job_list);
				while ((job_ptr = list_next(job_itr))) {
					if (job_ptr->magic != JOB_MAGIC) {
						error("bg_status_update_"
						      "block_state: 1 "
						      "bad magic found when "
						      "looking at block %s",
						      bg_record->bg_block_id);
						list_delete_item(job_itr);
						continue;
					}
					job_ptr->job_state |= JOB_CONFIGURING;
				}
				list_iterator_destroy(job_itr);
				last_job_update = time(NULL);
			}
			break;
		case BG_BLOCK_FREE:
			if (bg_record->boot_count < RETRY_BOOT_COUNT) {
				bridge_block_boot(bg_record);

				if (bg_record->magic == BLOCK_MAGIC) {
					debug("boot count for block %s is %d",
					      bg_record->bg_block_id,
					      bg_record->boot_count);
					bg_record->boot_count++;
				}
			} else {
				char *reason = (char *)
					"status_check: Boot fails ";

				error("Couldn't boot Block %s",
				      bg_record->bg_block_id);

				/* We can't push on the kill_job_list
				   here since we have to put this
				   block in an error and that means
				   the killing has to take place
				   before the erroring of the block.
				*/
				slurm_mutex_unlock(&block_state_mutex);
				unlock_slurmctld(job_read_lock);
				requeue_and_error(bg_record, reason);
				lock_slurmctld(job_read_lock);
				slurm_mutex_lock(&block_state_mutex);

				bg_record->boot_state = 0;
				bg_record->boot_count = 0;

				remove_from_bg_list(bg_lists->booted,
						    bg_record);
			}
			break;
		case BG_BLOCK_INITED:
			debug("block %s is ready.",
			      bg_record->bg_block_id);
			if (bg_record->job_ptr
			    && IS_JOB_CONFIGURING(bg_record->job_ptr)) {
				bg_record->job_ptr->job_state &=
					(~JOB_CONFIGURING);
				last_job_update = time(NULL);
			} else if (bg_record->job_list
				   && list_count(bg_record->job_list)) {
				struct job_record *job_ptr;
				ListIterator job_itr =
					list_iterator_create(
						bg_record->job_list);
				while ((job_ptr = list_next(job_itr))) {
					if (job_ptr->magic != JOB_MAGIC) {
						error("bg_status_update_"
						      "block_state: 2 "
						      "bad magic found when "
						      "looking at block %s",
						      bg_record->bg_block_id);
						list_delete_item(job_itr);
						continue;
					}
					job_ptr->job_state &=
						(~JOB_CONFIGURING);
				}
				list_iterator_destroy(job_itr);
				last_job_update = time(NULL);
			}

			bg_record->boot_state = 0;
			bg_record->boot_count = 0;

			if (kill_job_list &&
			    bridge_block_sync_users(bg_record)
			    == SLURM_ERROR) {
				freeit = (kill_job_struct_t *)
					xmalloc(sizeof(kill_job_struct_t));
				freeit->jobid = bg_record->job_running;
				list_push(kill_job_list, freeit);
			}
			break;
		case BG_BLOCK_TERM:
			debug2("Block %s is in a deallocating state "
			       "during a boot.  Doing nothing until "
			       "free state.",
			       bg_record->bg_block_id);
			break;
		case BG_BLOCK_REBOOTING:
			debug2("Block %s is rebooting.",
			       bg_record->bg_block_id);
			break;
		default:
			debug("Hey the state of block "
			      "%s is %d(%s) doing nothing.",
			      bg_record->bg_block_id,
			      real_state,
			      bg_block_state_string(bg_record->state));
			break;
		}
	}

	return updated;
}
Example #2
0
extern int bg_status_update_block_state(bg_record_t *bg_record,
					uint16_t state,
					List kill_job_list)
{
	bool skipped_dealloc = false;
	kill_job_struct_t *freeit = NULL;
	int updated = 0;
	uint16_t real_state = bg_record->state & (~BG_BLOCK_ERROR_FLAG);

	if (real_state == state)
		return 0;

	debug("state of Block %s was %s and now is %s",
	      bg_record->bg_block_id,
	      bg_block_state_string(bg_record->state),
	      bg_block_state_string(state));

	/*
	  check to make sure block went
	  through freeing correctly
	*/
	if ((real_state != BG_BLOCK_TERM
	     && !(bg_record->state & BG_BLOCK_ERROR_FLAG))
	    && state == BG_BLOCK_FREE)
		skipped_dealloc = 1;
	else if ((real_state == BG_BLOCK_INITED)
		 && (state == BG_BLOCK_BOOTING)) {
		/* This means the user did a reboot through
		   mpirun but we missed the state
		   change */
		debug("Block %s skipped rebooting, "
		      "but it really is.  "
		      "Setting target_name back to %s",
		      bg_record->bg_block_id,
		      bg_record->user_name);
		xfree(bg_record->target_name);
		bg_record->target_name = xstrdup(bg_record->user_name);
	} else if ((real_state == BG_BLOCK_TERM)
		   && (state == BG_BLOCK_BOOTING))
		/* This is a funky state IBM says
		   isn't a bug, but all their
		   documentation says this doesn't
		   happen, but IBM says oh yeah, you
		   weren't really suppose to notice
		   that. So we will just skip this
		   state and act like this didn't happen. */
		goto nochange_state;
	real_state = state;
	if (bg_record->state & BG_BLOCK_ERROR_FLAG)
		state |= BG_BLOCK_ERROR_FLAG;

	bg_record->state = state;

	if (real_state == BG_BLOCK_TERM || skipped_dealloc)
		_block_is_deallocating(bg_record, kill_job_list);
	else if (real_state == BG_BLOCK_BOOTING) {
		debug("Setting bootflag for %s", bg_record->bg_block_id);
		bg_record->boot_state = 1;
	} else if (real_state == BG_BLOCK_FREE) {
		if (remove_from_bg_list(bg_lists->job_running, bg_record)
		    == SLURM_SUCCESS)
			num_unused_cpus += bg_record->cpu_cnt;
		remove_from_bg_list(bg_lists->booted,
				    bg_record);
	} else if (real_state & BG_BLOCK_ERROR_FLAG) {
		if (bg_record->boot_state)
			error("Block %s in an error state while booting.",
			      bg_record->bg_block_id);
		else
			error("Block %s in an error state.",
			      bg_record->bg_block_id);
		remove_from_bg_list(bg_lists->booted, bg_record);
		trigger_block_error();
	} else if (real_state == BG_BLOCK_INITED) {
		if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
			list_push(bg_lists->booted, bg_record);
	}
	updated = 1;
nochange_state:

	/* check the boot state */
	debug3("boot state for block %s is %d",
	       bg_record->bg_block_id, bg_record->boot_state);
	if (bg_record->boot_state) {
		if (bg_record->state & BG_BLOCK_ERROR_FLAG) {
			/* If we get an error on boot that
			 * means it is a transparent L3 error
			 * and should be trying to fix
			 * itself.  If this is the case we
			 * just hang out waiting for the state
			 * to go to free where we will try to
			 * boot again below.
			 */
			return updated;
		}

		switch (real_state) {
		case BG_BLOCK_BOOTING:
			debug3("checking to make sure user %s "
			       "is the user.",
			       bg_record->target_name);

			if (update_block_user(bg_record, 0) == 1)
				last_bg_update = time(NULL);
			if (bg_record->job_ptr) {
				bg_record->job_ptr->job_state |=
					JOB_CONFIGURING;
				last_job_update = time(NULL);
			}
			break;
		case BG_BLOCK_FREE:
			if (bg_record->boot_count < RETRY_BOOT_COUNT) {
				bridge_block_boot(bg_record);

				if (bg_record->magic == BLOCK_MAGIC) {
					debug("boot count for block %s is %d",
					      bg_record->bg_block_id,
					      bg_record->boot_count);
					bg_record->boot_count++;
				}
			} else {
				char *reason = (char *)
					"status_check: Boot fails ";

				error("Couldn't boot Block %s for user %s",
				      bg_record->bg_block_id,
				      bg_record->target_name);

				slurm_mutex_unlock(&block_state_mutex);
				requeue_and_error(bg_record, reason);
				slurm_mutex_lock(&block_state_mutex);

				bg_record->boot_state = 0;
				bg_record->boot_count = 0;
				if (remove_from_bg_list(
					    bg_lists->job_running, bg_record)
				    == SLURM_SUCCESS)
					num_unused_cpus += bg_record->cpu_cnt;

				remove_from_bg_list(bg_lists->booted,
						    bg_record);
			}
			break;
		case BG_BLOCK_INITED:
			debug("block %s is ready.",
			      bg_record->bg_block_id);
			if (bg_record->job_ptr) {
				bg_record->job_ptr->job_state &=
					(~JOB_CONFIGURING);
				last_job_update = time(NULL);
			}
			/* boot flags are reset here */
			if (kill_job_list &&
			    set_block_user(bg_record) == SLURM_ERROR) {
				freeit = (kill_job_struct_t *)
					xmalloc(sizeof(kill_job_struct_t));
				freeit->jobid = bg_record->job_running;
				list_push(kill_job_list, freeit);
			}
			break;
		case BG_BLOCK_TERM:
			debug2("Block %s is in a deallocating state "
			       "during a boot.  Doing nothing until "
			       "free state.",
			       bg_record->bg_block_id);
			break;
		case BG_BLOCK_REBOOTING:
			debug2("Block %s is rebooting.",
			       bg_record->bg_block_id);
			break;
		default:
			debug("Hey the state of block "
			      "%s is %d(%s) doing nothing.",
			      bg_record->bg_block_id,
			      real_state,
			      bg_block_state_string(bg_record->state));
			break;
		}
	}

	return updated;
}
Example #3
0
/* block_state_mutex must be unlocked before calling this. */
extern int put_block_in_error_state(bg_record_t *bg_record, char *reason)
{
	uid_t pw_uid;

	xassert(bg_record);

	/* Only check this if the blocks are created, meaning this
	   isn't at startup.
	*/
	if (blocks_are_created) {
		/* Since we are putting this block in an error state we need
		   to wait for the job to be removed.  We don't really
		   need to free the block though since we may just
		   want it to be in an error state for some reason. */
		while (bg_record->job_running > NO_JOB_RUNNING) {
			if (bg_record->magic != BLOCK_MAGIC) {
				error("While putting block %s in a error "
				      "state it was destroyed",
				      bg_record->bg_block_id);
				return SLURM_ERROR;
			}
			debug2("block %s is still running job %d",
			       bg_record->bg_block_id, bg_record->job_running);
			sleep(1);
		}
	}

	slurm_mutex_lock(&block_state_mutex);
	if (!block_ptr_exist_in_list(bg_lists->main, bg_record)) {
		slurm_mutex_unlock(&block_state_mutex);
		error("while trying to put block in "
		      "error state it disappeared");
		return SLURM_ERROR;
	}

	info("Setting Block %s to ERROR state. (reason: '%s')",
	     bg_record->bg_block_id, reason);
	/* we add the block to these lists so we don't try to schedule
	   on them. */
	if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) {
		list_push(bg_lists->job_running, bg_record);
		num_unused_cpus -= bg_record->cpu_cnt;
	}
	if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))
		list_push(bg_lists->booted, bg_record);

	bg_record->job_running = BLOCK_ERROR_STATE;
	bg_record->state |= BG_BLOCK_ERROR_FLAG;

	xfree(bg_record->user_name);
	xfree(bg_record->target_name);
	bg_record->user_name = xstrdup(bg_conf->slurm_user_name);
	bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
	bg_record->reason = xstrdup(reason);

	if (uid_from_string (bg_record->user_name, &pw_uid) < 0)
		error("No such user: %s", bg_record->user_name);
	else
		bg_record->user_uid = pw_uid;

	/* Only send if reason is set.  If it isn't set then
	   accounting should already know about this error state */
	if (reason)
		_set_block_nodes_accounting(bg_record, reason);
	slurm_mutex_unlock(&block_state_mutex);

	trigger_block_error();
	return SLURM_SUCCESS;
}